diff --git "a/gemma3_monolithic_full_lut6.mlmodelc/model.mil" "b/gemma3_monolithic_full_lut6.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/gemma3_monolithic_full_lut6.mlmodelc/model.mil" @@ -0,0 +1,28057 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor input_ids, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { + tensor model_model_embed_tokens_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301990016))))[name = string("model_model_embed_tokens_weight_palettized")]; + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335544512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336429312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336462144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336683392))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336691648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336912896))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336921152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337805952))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337838784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338060032))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338068288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338289536))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338297792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339182592))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339215424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339436672))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339444928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339666176))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339674432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340559232))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340592064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340813312))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340821568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341042816))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341051072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341935872))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342189952))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342198208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342419456))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342427712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343312512))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343345344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343566592))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343574848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343796096))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343804352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344689152))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344721984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344943232))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344951488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345172736))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345180992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346065792))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346098624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346319872))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346328128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346549376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346557632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347442432))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347475264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347696512))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347926016))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347934272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348819072))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348851904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349073152))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349081408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349302656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350195712))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350228544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350449792))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350458048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350679296))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350687552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351572352))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351826432))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351834688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352055936))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352948992))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352981824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353203072))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353211328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353432576))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353440832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354325632))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354358464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354579712))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354587968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354809216))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354817472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355702272))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355735104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355956352))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355964608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356185856))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356194112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357078912))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357111744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357332992))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357341248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357562496))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357570752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358455552))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358488384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358709632))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358717888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358939136))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358947392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359832192))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359865024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360086272))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360094528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360315776))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360324032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361208832))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361241664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361462912))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361471168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361692416))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361700672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362585472))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362618304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362839552))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362847808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363069056))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363077312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363962112))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363994944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364216192))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364445696))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364453952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365338752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365371584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365592832))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365601088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365822336))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365830592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366715392))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366748224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366969472))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367198976))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367207232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368092032))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368124864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368346112))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368354368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368575616))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368583872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369468672))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369501504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369722752))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369731008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369952256))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369960512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370845312))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370878144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371099392))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371107648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371328896))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + int32 hidden_states_1_batch_dims_0 = const()[name = string("hidden_states_1_batch_dims_0"), val = int32(0)]; + bool hidden_states_1_validate_indices_0 = const()[name = string("hidden_states_1_validate_indices_0"), val = bool(false)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; + tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(262144)]; + tensor add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")]; + tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 hidden_states_1_axis_0 = const()[name = string("hidden_states_1_axis_0"), val = int32(0)]; + tensor hidden_states_1 = gather(axis = hidden_states_1_axis_0, batch_dims = hidden_states_1_batch_dims_0, indices = select_0_1, validate_indices = hidden_states_1_validate_indices_0, x = model_model_embed_tokens_weight_palettized)[name = string("hidden_states_1")]; + fp16 var_1691_to_fp16 = const()[name = string("op_1691_to_fp16"), val = fp16(0x1.0f8p+5)]; + tensor hidden_states_3_cast_fp16 = mul(x = hidden_states_1, y = var_1691_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + int32 var_1706_axis_0 = const()[name = string("op_1706_axis_0"), val = int32(1)]; + int32 var_1706_batch_dims_0 = const()[name = string("op_1706_batch_dims_0"), val = int32(0)]; + bool var_1706_validate_indices_0 = const()[name = string("op_1706_validate_indices_0"), val = bool(false)]; + tensor var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371337152)))]; + string current_pos_to_uint16_dtype_0 = const()[name = string("current_pos_to_uint16_dtype_0"), val = string("uint16")]; + tensor current_pos_to_uint16 = cast(dtype = current_pos_to_uint16_dtype_0, x = current_pos)[name = string("cast_2")]; + tensor var_1706_cast_fp16_cast_uint16 = gather(axis = var_1706_axis_0, batch_dims = var_1706_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1706_validate_indices_0, x = var_1698_to_fp16)[name = string("op_1706_cast_fp16_cast_uint16")]; + tensor var_1711 = const()[name = string("op_1711"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1711, x = var_1706_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1721_axis_0 = const()[name = string("op_1721_axis_0"), val = int32(1)]; + int32 var_1721_batch_dims_0 = const()[name = string("op_1721_batch_dims_0"), val = int32(0)]; + bool var_1721_validate_indices_0 = const()[name = string("op_1721_validate_indices_0"), val = bool(false)]; + tensor var_1713_to_fp16 = const()[name = string("op_1713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375531520)))]; + tensor var_1721_cast_fp16_cast_uint16 = gather(axis = var_1721_axis_0, batch_dims = var_1721_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1721_validate_indices_0, x = var_1713_to_fp16)[name = string("op_1721_cast_fp16_cast_uint16")]; + tensor var_1726 = const()[name = string("op_1726"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1726, x = var_1721_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1747 = const()[name = string("op_1747"), val = int32(-1)]; + fp16 const_0_promoted = const()[name = string("const_0_promoted"), val = fp16(-0x1p+0)]; + tensor var_1749 = mul(x = hidden_states_3_cast_fp16, y = const_0_promoted)[name = string("op_1749")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1 = concat(axis = var_1747, interleave = input_1_interleave_0, values = (hidden_states_3_cast_fp16, var_1749))[name = string("input_1")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1744_to_fp16 = const()[name = string("op_1744_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1744_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3")]; + tensor var_1763_to_fp16 = const()[name = string("op_1763_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379725888)))]; + tensor hidden_states_7_cast_fp16 = mul(x = normed_3, y = var_1763_to_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_1768 = const()[name = string("op_1768"), val = tensor([0, 2, 1])]; + tensor var_1771_axes_0 = const()[name = string("op_1771_axes_0"), val = tensor([2])]; + tensor var_1769_cast_fp16 = transpose(perm = var_1768, x = hidden_states_7_cast_fp16)[name = string("transpose_172")]; + tensor var_1771_cast_fp16 = expand_dims(axes = var_1771_axes_0, x = var_1769_cast_fp16)[name = string("op_1771_cast_fp16")]; + string var_1787_pad_type_0 = const()[name = string("op_1787_pad_type_0"), val = string("valid")]; + tensor var_1787_strides_0 = const()[name = string("op_1787_strides_0"), val = tensor([1, 1])]; + tensor var_1787_pad_0 = const()[name = string("op_1787_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1787_dilations_0 = const()[name = string("op_1787_dilations_0"), val = tensor([1, 1])]; + int32 var_1787_groups_0 = const()[name = string("op_1787_groups_0"), val = int32(1)]; + tensor var_1787 = conv(dilations = var_1787_dilations_0, groups = var_1787_groups_0, pad = var_1787_pad_0, pad_type = var_1787_pad_type_0, strides = var_1787_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1771_cast_fp16)[name = string("op_1787")]; + tensor var_1792 = const()[name = string("op_1792"), val = tensor([1, 4, 1, 256])]; + tensor var_1793 = reshape(shape = var_1792, x = var_1787)[name = string("op_1793")]; + string var_1809_pad_type_0 = const()[name = string("op_1809_pad_type_0"), val = string("valid")]; + tensor var_1809_strides_0 = const()[name = string("op_1809_strides_0"), val = tensor([1, 1])]; + tensor var_1809_pad_0 = const()[name = string("op_1809_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1809_dilations_0 = const()[name = string("op_1809_dilations_0"), val = tensor([1, 1])]; + int32 var_1809_groups_0 = const()[name = string("op_1809_groups_0"), val = int32(1)]; + tensor var_1809 = conv(dilations = var_1809_dilations_0, groups = var_1809_groups_0, pad = var_1809_pad_0, pad_type = var_1809_pad_type_0, strides = var_1809_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1771_cast_fp16)[name = string("op_1809")]; + tensor var_1814 = const()[name = string("op_1814"), val = tensor([1, 1, 1, 256])]; + tensor var_1815 = reshape(shape = var_1814, x = var_1809)[name = string("op_1815")]; + string var_1831_pad_type_0 = const()[name = string("op_1831_pad_type_0"), val = string("valid")]; + tensor var_1831_strides_0 = const()[name = string("op_1831_strides_0"), val = tensor([1, 1])]; + tensor var_1831_pad_0 = const()[name = string("op_1831_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1831_dilations_0 = const()[name = string("op_1831_dilations_0"), val = tensor([1, 1])]; + int32 var_1831_groups_0 = const()[name = string("op_1831_groups_0"), val = int32(1)]; + tensor var_1831 = conv(dilations = var_1831_dilations_0, groups = var_1831_groups_0, pad = var_1831_pad_0, pad_type = var_1831_pad_type_0, strides = var_1831_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1771_cast_fp16)[name = string("op_1831")]; + tensor var_1836 = const()[name = string("op_1836"), val = tensor([1, 1, 1, 256])]; + tensor var_1837 = reshape(shape = var_1836, x = var_1831)[name = string("op_1837")]; + int32 var_1852 = const()[name = string("op_1852"), val = int32(-1)]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_1854 = mul(x = var_1793, y = const_4_promoted)[name = string("op_1854")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1852, interleave = input_5_interleave_0, values = (var_1793, var_1854))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1849_to_fp16 = const()[name = string("op_1849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1849_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor var_1868_to_fp16 = const()[name = string("op_1868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728256)))]; + tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1868_to_fp16)[name = string("q_1_cast_fp16")]; + int32 var_1879 = const()[name = string("op_1879"), val = int32(-1)]; + fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; + tensor var_1881 = mul(x = var_1815, y = const_8_promoted)[name = string("op_1881")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1879, interleave = input_7_interleave_0, values = (var_1815, var_1881))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1876_to_fp16 = const()[name = string("op_1876_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1876_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor var_1895_to_fp16 = const()[name = string("op_1895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728832)))]; + tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1895_to_fp16)[name = string("k_1_cast_fp16")]; + tensor var_1897_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1897_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1918_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1918_cast_fp16")]; + int32 var_1920 = const()[name = string("op_1920"), val = int32(-1)]; + bool var_1921_interleave_0 = const()[name = string("op_1921_interleave_0"), val = bool(false)]; + tensor var_1921_cast_fp16 = concat(axis = var_1920, interleave = var_1921_interleave_0, values = (var_1918_cast_fp16, x1_1_cast_fp16))[name = string("op_1921_cast_fp16")]; + tensor var_1922_cast_fp16 = mul(x = var_1921_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1922_cast_fp16")]; + tensor query_states_1_cast_fp16 = add(x = var_1897_cast_fp16, y = var_1922_cast_fp16)[name = string("query_states_1_cast_fp16")]; + tensor var_1925_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1925_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1946_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1946_cast_fp16")]; + int32 var_1948 = const()[name = string("op_1948"), val = int32(-1)]; + bool var_1949_interleave_0 = const()[name = string("op_1949_interleave_0"), val = bool(false)]; + tensor var_1949_cast_fp16 = concat(axis = var_1948, interleave = var_1949_interleave_0, values = (var_1946_cast_fp16, x1_3_cast_fp16))[name = string("op_1949_cast_fp16")]; + tensor var_1950_cast_fp16 = mul(x = var_1949_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1950_cast_fp16")]; + tensor key_states_1_cast_fp16 = add(x = var_1925_cast_fp16, y = var_1950_cast_fp16)[name = string("key_states_1_cast_fp16")]; + int32 var_1954 = const()[name = string("op_1954"), val = int32(1)]; + tensor var_1955 = add(x = current_pos, y = var_1954)[name = string("op_1955")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1955, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = key_states_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([22])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([23])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1955, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = var_1837, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_1")]; + tensor var_2005_begin_0 = const()[name = string("op_2005_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2005_end_0 = const()[name = string("op_2005_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_2005_end_mask_0 = const()[name = string("op_2005_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2005_cast_fp16 = slice_by_index(begin = var_2005_begin_0, end = var_2005_end_0, end_mask = var_2005_end_mask_0, x = coreml_update_state_53)[name = string("op_2005_cast_fp16")]; + tensor var_2012_begin_0 = const()[name = string("op_2012_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_2012_end_0 = const()[name = string("op_2012_end_0"), val = tensor([23, 1, 512, 256])]; + tensor var_2012_end_mask_0 = const()[name = string("op_2012_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, x = coreml_update_state_53)[name = string("op_2012_cast_fp16")]; + tensor var_2049 = const()[name = string("op_2049"), val = tensor([1, 4, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_2049, x = var_2005_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_2069 = const()[name = string("op_2069"), val = tensor([1, 4, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_2069, x = var_2012_cast_fp16)[name = string("x_11_cast_fp16")]; + bool var_2096_transpose_x_1 = const()[name = string("op_2096_transpose_x_1"), val = bool(false)]; + bool var_2096_transpose_y_1 = const()[name = string("op_2096_transpose_y_1"), val = bool(true)]; + tensor var_2096 = matmul(transpose_x = var_2096_transpose_x_1, transpose_y = var_2096_transpose_y_1, x = query_states_1_cast_fp16, y = x_5_cast_fp16)[name = string("op_2096")]; + fp16 var_2097_to_fp16 = const()[name = string("op_2097_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_2096, y = var_2097_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor var_2129_begin_0 = const()[name = string("op_2129_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2129_end_0 = const()[name = string("op_2129_end_0"), val = tensor([1, 1, 1, 512])]; + tensor var_2129_end_mask_0 = const()[name = string("op_2129_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2129 = slice_by_index(begin = var_2129_begin_0, end = var_2129_end_0, end_mask = var_2129_end_mask_0, x = causal_mask)[name = string("op_2129")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = var_2129)[name = string("attn_weights_3_cast_fp16")]; + int32 var_2132 = const()[name = string("op_2132"), val = int32(-1)]; + tensor attn_weights_5_cast_fp16 = softmax(axis = var_2132, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = x_11_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_2143_perm_0 = const()[name = string("op_2143_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2147 = const()[name = string("op_2147"), val = tensor([1, 1, 1024])]; + tensor var_2143_cast_fp16 = transpose(perm = var_2143_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_171")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_2147, x = var_2143_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_2152 = const()[name = string("op_2152"), val = tensor([0, 2, 1])]; + string var_2168_pad_type_0 = const()[name = string("op_2168_pad_type_0"), val = string("valid")]; + int32 var_2168_groups_0 = const()[name = string("op_2168_groups_0"), val = int32(1)]; + tensor var_2168_strides_0 = const()[name = string("op_2168_strides_0"), val = tensor([1])]; + tensor var_2168_pad_0 = const()[name = string("op_2168_pad_0"), val = tensor([0, 0])]; + tensor var_2168_dilations_0 = const()[name = string("op_2168_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379729408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380614208))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2153_cast_fp16 = transpose(perm = var_2152, x = attn_output_5_cast_fp16)[name = string("transpose_170")]; + tensor var_2168_cast_fp16 = conv(dilations = var_2168_dilations_0, groups = var_2168_groups_0, pad = var_2168_pad_0, pad_type = var_2168_pad_type_0, strides = var_2168_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2153_cast_fp16)[name = string("op_2168_cast_fp16")]; + tensor var_2172 = const()[name = string("op_2172"), val = tensor([0, 2, 1])]; + int32 var_2183 = const()[name = string("op_2183"), val = int32(-1)]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_13_cast_fp16 = transpose(perm = var_2172, x = var_2168_cast_fp16)[name = string("transpose_169")]; + tensor var_2185_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2185_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_2183, interleave = input_11_interleave_0, values = (hidden_states_13_cast_fp16, var_2185_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_2180_to_fp16 = const()[name = string("op_2180_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2180_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor var_2199_to_fp16 = const()[name = string("op_2199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380651136)))]; + tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2199_to_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = attn_output_9_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + int32 var_2212 = const()[name = string("op_2212"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2214_cast_fp16 = mul(x = hidden_states_15_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2214_cast_fp16")]; + bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; + tensor input_13_cast_fp16 = concat(axis = var_2212, interleave = input_13_interleave_0, values = (hidden_states_15_cast_fp16, var_2214_cast_fp16))[name = string("input_13_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2209_to_fp16 = const()[name = string("op_2209_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2209_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380653504)))]; + tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2228_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2240 = const()[name = string("op_2240"), val = tensor([0, 2, 1])]; + tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; + tensor var_2241_cast_fp16 = transpose(perm = var_2240, x = x_13_cast_fp16)[name = string("transpose_168")]; + tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2241_cast_fp16)[name = string("input_15_cast_fp16")]; + string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; + tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; + tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; + int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380655872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386627904))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386849152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392821184))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; + string var_2266_mode_0 = const()[name = string("op_2266_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2266_cast_fp16 = gelu(mode = var_2266_mode_0, x = x_15_cast_fp16)[name = string("op_2266_cast_fp16")]; + tensor input_17_cast_fp16 = mul(x = var_2266_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393042432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399014464))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; + tensor var_2274_axes_0 = const()[name = string("op_2274_axes_0"), val = tensor([2])]; + tensor var_2274_cast_fp16 = squeeze(axes = var_2274_axes_0, x = e_1_cast_fp16)[name = string("op_2274_cast_fp16")]; + tensor var_2275 = const()[name = string("op_2275"), val = tensor([0, 2, 1])]; + int32 var_2286 = const()[name = string("op_2286"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_17_cast_fp16 = transpose(perm = var_2275, x = var_2274_cast_fp16)[name = string("transpose_167")]; + tensor var_2288_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2288_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2286, interleave = input_19_interleave_0, values = (hidden_states_17_cast_fp16, var_2288_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2283_to_fp16 = const()[name = string("op_2283_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2283_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; + tensor var_2302_to_fp16 = const()[name = string("op_2302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399051392)))]; + tensor hidden_states_19_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2302_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2353 = const()[name = string("op_2353"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2355_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2355_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_2353, interleave = input_21_interleave_0, values = (hidden_states_21_cast_fp16, var_2355_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2350_to_fp16 = const()[name = string("op_2350_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2350_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; + tensor var_2369_to_fp16 = const()[name = string("op_2369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399053760)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2369_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2374 = const()[name = string("op_2374"), val = tensor([0, 2, 1])]; + tensor var_2377_axes_0 = const()[name = string("op_2377_axes_0"), val = tensor([2])]; + tensor var_2375_cast_fp16 = transpose(perm = var_2374, x = hidden_states_23_cast_fp16)[name = string("transpose_166")]; + tensor var_2377_cast_fp16 = expand_dims(axes = var_2377_axes_0, x = var_2375_cast_fp16)[name = string("op_2377_cast_fp16")]; + string var_2393_pad_type_0 = const()[name = string("op_2393_pad_type_0"), val = string("valid")]; + tensor var_2393_strides_0 = const()[name = string("op_2393_strides_0"), val = tensor([1, 1])]; + tensor var_2393_pad_0 = const()[name = string("op_2393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2393_dilations_0 = const()[name = string("op_2393_dilations_0"), val = tensor([1, 1])]; + int32 var_2393_groups_0 = const()[name = string("op_2393_groups_0"), val = int32(1)]; + tensor var_2393 = conv(dilations = var_2393_dilations_0, groups = var_2393_groups_0, pad = var_2393_pad_0, pad_type = var_2393_pad_type_0, strides = var_2393_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2377_cast_fp16)[name = string("op_2393")]; + tensor var_2398 = const()[name = string("op_2398"), val = tensor([1, 4, 1, 256])]; + tensor var_2399 = reshape(shape = var_2398, x = var_2393)[name = string("op_2399")]; + string var_2415_pad_type_0 = const()[name = string("op_2415_pad_type_0"), val = string("valid")]; + tensor var_2415_strides_0 = const()[name = string("op_2415_strides_0"), val = tensor([1, 1])]; + tensor var_2415_pad_0 = const()[name = string("op_2415_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2415_dilations_0 = const()[name = string("op_2415_dilations_0"), val = tensor([1, 1])]; + int32 var_2415_groups_0 = const()[name = string("op_2415_groups_0"), val = int32(1)]; + tensor var_2415 = conv(dilations = var_2415_dilations_0, groups = var_2415_groups_0, pad = var_2415_pad_0, pad_type = var_2415_pad_type_0, strides = var_2415_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2377_cast_fp16)[name = string("op_2415")]; + tensor var_2420 = const()[name = string("op_2420"), val = tensor([1, 1, 1, 256])]; + tensor var_2421 = reshape(shape = var_2420, x = var_2415)[name = string("op_2421")]; + string var_2437_pad_type_0 = const()[name = string("op_2437_pad_type_0"), val = string("valid")]; + tensor var_2437_strides_0 = const()[name = string("op_2437_strides_0"), val = tensor([1, 1])]; + tensor var_2437_pad_0 = const()[name = string("op_2437_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2437_dilations_0 = const()[name = string("op_2437_dilations_0"), val = tensor([1, 1])]; + int32 var_2437_groups_0 = const()[name = string("op_2437_groups_0"), val = int32(1)]; + tensor var_2437 = conv(dilations = var_2437_dilations_0, groups = var_2437_groups_0, pad = var_2437_pad_0, pad_type = var_2437_pad_type_0, strides = var_2437_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2377_cast_fp16)[name = string("op_2437")]; + tensor var_2442 = const()[name = string("op_2442"), val = tensor([1, 1, 1, 256])]; + tensor var_2443 = reshape(shape = var_2442, x = var_2437)[name = string("op_2443")]; + int32 var_2458 = const()[name = string("op_2458"), val = int32(-1)]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2460 = mul(x = var_2399, y = const_42_promoted)[name = string("op_2460")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2458, interleave = input_25_interleave_0, values = (var_2399, var_2460))[name = string("input_25")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2455_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; + tensor var_2474_to_fp16 = const()[name = string("op_2474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056128)))]; + tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2474_to_fp16)[name = string("q_3_cast_fp16")]; + int32 var_2485 = const()[name = string("op_2485"), val = int32(-1)]; + fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; + tensor var_2487 = mul(x = var_2421, y = const_46_promoted)[name = string("op_2487")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_2485, interleave = input_27_interleave_0, values = (var_2421, var_2487))[name = string("input_27")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2482_to_fp16 = const()[name = string("op_2482_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2482_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; + tensor var_2501_to_fp16 = const()[name = string("op_2501_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056704)))]; + tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2501_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_2503_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2503_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2524_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2524_cast_fp16")]; + int32 var_2526 = const()[name = string("op_2526"), val = int32(-1)]; + bool var_2527_interleave_0 = const()[name = string("op_2527_interleave_0"), val = bool(false)]; + tensor var_2527_cast_fp16 = concat(axis = var_2526, interleave = var_2527_interleave_0, values = (var_2524_cast_fp16, x1_5_cast_fp16))[name = string("op_2527_cast_fp16")]; + tensor var_2528_cast_fp16 = mul(x = var_2527_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2528_cast_fp16")]; + tensor query_states_5_cast_fp16 = add(x = var_2503_cast_fp16, y = var_2528_cast_fp16)[name = string("query_states_5_cast_fp16")]; + tensor var_2531_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2531_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2552_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_2552_cast_fp16")]; + int32 var_2554 = const()[name = string("op_2554"), val = int32(-1)]; + bool var_2555_interleave_0 = const()[name = string("op_2555_interleave_0"), val = bool(false)]; + tensor var_2555_cast_fp16 = concat(axis = var_2554, interleave = var_2555_interleave_0, values = (var_2552_cast_fp16, x1_7_cast_fp16))[name = string("op_2555_cast_fp16")]; + tensor var_2556_cast_fp16 = mul(x = var_2555_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2556_cast_fp16")]; + tensor key_states_5_cast_fp16 = add(x = var_2531_cast_fp16, y = var_2556_cast_fp16)[name = string("key_states_5_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1955, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = key_states_5_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([23])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([24])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1955, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = var_2443, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_3")]; + tensor var_2611_begin_0 = const()[name = string("op_2611_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2611_end_0 = const()[name = string("op_2611_end_0"), val = tensor([2, 1, 512, 256])]; + tensor var_2611_end_mask_0 = const()[name = string("op_2611_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2611_cast_fp16 = slice_by_index(begin = var_2611_begin_0, end = var_2611_end_0, end_mask = var_2611_end_mask_0, x = coreml_update_state_55)[name = string("op_2611_cast_fp16")]; + tensor var_2618_begin_0 = const()[name = string("op_2618_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_2618_end_0 = const()[name = string("op_2618_end_0"), val = tensor([24, 1, 512, 256])]; + tensor var_2618_end_mask_0 = const()[name = string("op_2618_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2618_cast_fp16 = slice_by_index(begin = var_2618_begin_0, end = var_2618_end_0, end_mask = var_2618_end_mask_0, x = coreml_update_state_55)[name = string("op_2618_cast_fp16")]; + tensor var_2655 = const()[name = string("op_2655"), val = tensor([1, 4, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2655, x = var_2611_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2675 = const()[name = string("op_2675"), val = tensor([1, 4, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2675, x = var_2618_cast_fp16)[name = string("x_27_cast_fp16")]; + bool var_2702_transpose_x_1 = const()[name = string("op_2702_transpose_x_1"), val = bool(false)]; + bool var_2702_transpose_y_1 = const()[name = string("op_2702_transpose_y_1"), val = bool(true)]; + tensor var_2702 = matmul(transpose_x = var_2702_transpose_x_1, transpose_y = var_2702_transpose_y_1, x = query_states_5_cast_fp16, y = x_21_cast_fp16)[name = string("op_2702")]; + fp16 var_2703_to_fp16 = const()[name = string("op_2703_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_2702, y = var_2703_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = var_2129)[name = string("attn_weights_9_cast_fp16")]; + int32 var_2738 = const()[name = string("op_2738"), val = int32(-1)]; + tensor attn_weights_11_cast_fp16 = softmax(axis = var_2738, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; + bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; + tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = x_27_cast_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor var_2749_perm_0 = const()[name = string("op_2749_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2753 = const()[name = string("op_2753"), val = tensor([1, 1, 1024])]; + tensor var_2749_cast_fp16 = transpose(perm = var_2749_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_165")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2753, x = var_2749_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2758 = const()[name = string("op_2758"), val = tensor([0, 2, 1])]; + string var_2774_pad_type_0 = const()[name = string("op_2774_pad_type_0"), val = string("valid")]; + int32 var_2774_groups_0 = const()[name = string("op_2774_groups_0"), val = int32(1)]; + tensor var_2774_strides_0 = const()[name = string("op_2774_strides_0"), val = tensor([1])]; + tensor var_2774_pad_0 = const()[name = string("op_2774_pad_0"), val = tensor([0, 0])]; + tensor var_2774_dilations_0 = const()[name = string("op_2774_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399057280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399942080))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2759_cast_fp16 = transpose(perm = var_2758, x = attn_output_15_cast_fp16)[name = string("transpose_164")]; + tensor var_2774_cast_fp16 = conv(dilations = var_2774_dilations_0, groups = var_2774_groups_0, pad = var_2774_pad_0, pad_type = var_2774_pad_type_0, strides = var_2774_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2759_cast_fp16)[name = string("op_2774_cast_fp16")]; + tensor var_2778 = const()[name = string("op_2778"), val = tensor([0, 2, 1])]; + int32 var_2789 = const()[name = string("op_2789"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_29_cast_fp16 = transpose(perm = var_2778, x = var_2774_cast_fp16)[name = string("transpose_163")]; + tensor var_2791_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_2791_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_2789, interleave = input_31_interleave_0, values = (hidden_states_29_cast_fp16, var_2791_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2786_to_fp16 = const()[name = string("op_2786_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2786_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; + tensor var_2805_to_fp16 = const()[name = string("op_2805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399979008)))]; + tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2805_to_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + int32 var_2818 = const()[name = string("op_2818"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2820_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_2820_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_2818, interleave = input_33_interleave_0, values = (hidden_states_31_cast_fp16, var_2820_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2815_to_fp16 = const()[name = string("op_2815_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2815_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; + tensor var_2834_to_fp16 = const()[name = string("op_2834_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399981376)))]; + tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2834_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_2846 = const()[name = string("op_2846"), val = tensor([0, 2, 1])]; + tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; + tensor var_2847_cast_fp16 = transpose(perm = var_2846, x = x_29_cast_fp16)[name = string("transpose_162")]; + tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2847_cast_fp16)[name = string("input_35_cast_fp16")]; + string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; + tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; + tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; + int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399983744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405955776))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406177024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412149056))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; + string var_2872_mode_0 = const()[name = string("op_2872_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2872_cast_fp16 = gelu(mode = var_2872_mode_0, x = x_31_cast_fp16)[name = string("op_2872_cast_fp16")]; + tensor input_37_cast_fp16 = mul(x = var_2872_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412370304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418342336))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; + tensor var_2880_axes_0 = const()[name = string("op_2880_axes_0"), val = tensor([2])]; + tensor var_2880_cast_fp16 = squeeze(axes = var_2880_axes_0, x = e_3_cast_fp16)[name = string("op_2880_cast_fp16")]; + tensor var_2881 = const()[name = string("op_2881"), val = tensor([0, 2, 1])]; + int32 var_2892 = const()[name = string("op_2892"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_33_cast_fp16 = transpose(perm = var_2881, x = var_2880_cast_fp16)[name = string("transpose_161")]; + tensor var_2894_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2894_cast_fp16")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39_cast_fp16 = concat(axis = var_2892, interleave = input_39_interleave_0, values = (hidden_states_33_cast_fp16, var_2894_cast_fp16))[name = string("input_39_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_2889_to_fp16 = const()[name = string("op_2889_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2889_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor var_2908_to_fp16 = const()[name = string("op_2908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418379264)))]; + tensor hidden_states_35_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_2908_to_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + int32 var_2959 = const()[name = string("op_2959"), val = int32(-1)]; + fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2961_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_2961_cast_fp16")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41_cast_fp16 = concat(axis = var_2959, interleave = input_41_interleave_0, values = (hidden_states_37_cast_fp16, var_2961_cast_fp16))[name = string("input_41_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2956_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418381632)))]; + tensor hidden_states_39_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_2975_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor var_2980 = const()[name = string("op_2980"), val = tensor([0, 2, 1])]; + tensor var_2983_axes_0 = const()[name = string("op_2983_axes_0"), val = tensor([2])]; + tensor var_2981_cast_fp16 = transpose(perm = var_2980, x = hidden_states_39_cast_fp16)[name = string("transpose_160")]; + tensor var_2983_cast_fp16 = expand_dims(axes = var_2983_axes_0, x = var_2981_cast_fp16)[name = string("op_2983_cast_fp16")]; + string var_2999_pad_type_0 = const()[name = string("op_2999_pad_type_0"), val = string("valid")]; + tensor var_2999_strides_0 = const()[name = string("op_2999_strides_0"), val = tensor([1, 1])]; + tensor var_2999_pad_0 = const()[name = string("op_2999_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2999_dilations_0 = const()[name = string("op_2999_dilations_0"), val = tensor([1, 1])]; + int32 var_2999_groups_0 = const()[name = string("op_2999_groups_0"), val = int32(1)]; + tensor var_2999 = conv(dilations = var_2999_dilations_0, groups = var_2999_groups_0, pad = var_2999_pad_0, pad_type = var_2999_pad_type_0, strides = var_2999_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2983_cast_fp16)[name = string("op_2999")]; + tensor var_3004 = const()[name = string("op_3004"), val = tensor([1, 4, 1, 256])]; + tensor var_3005 = reshape(shape = var_3004, x = var_2999)[name = string("op_3005")]; + string var_3021_pad_type_0 = const()[name = string("op_3021_pad_type_0"), val = string("valid")]; + tensor var_3021_strides_0 = const()[name = string("op_3021_strides_0"), val = tensor([1, 1])]; + tensor var_3021_pad_0 = const()[name = string("op_3021_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3021_dilations_0 = const()[name = string("op_3021_dilations_0"), val = tensor([1, 1])]; + int32 var_3021_groups_0 = const()[name = string("op_3021_groups_0"), val = int32(1)]; + tensor var_3021 = conv(dilations = var_3021_dilations_0, groups = var_3021_groups_0, pad = var_3021_pad_0, pad_type = var_3021_pad_type_0, strides = var_3021_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2983_cast_fp16)[name = string("op_3021")]; + tensor var_3026 = const()[name = string("op_3026"), val = tensor([1, 1, 1, 256])]; + tensor var_3027 = reshape(shape = var_3026, x = var_3021)[name = string("op_3027")]; + string var_3043_pad_type_0 = const()[name = string("op_3043_pad_type_0"), val = string("valid")]; + tensor var_3043_strides_0 = const()[name = string("op_3043_strides_0"), val = tensor([1, 1])]; + tensor var_3043_pad_0 = const()[name = string("op_3043_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3043_dilations_0 = const()[name = string("op_3043_dilations_0"), val = tensor([1, 1])]; + int32 var_3043_groups_0 = const()[name = string("op_3043_groups_0"), val = int32(1)]; + tensor var_3043 = conv(dilations = var_3043_dilations_0, groups = var_3043_groups_0, pad = var_3043_pad_0, pad_type = var_3043_pad_type_0, strides = var_3043_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2983_cast_fp16)[name = string("op_3043")]; + tensor var_3048 = const()[name = string("op_3048"), val = tensor([1, 1, 1, 256])]; + tensor var_3049 = reshape(shape = var_3048, x = var_3043)[name = string("op_3049")]; + int32 var_3064 = const()[name = string("op_3064"), val = int32(-1)]; + fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; + tensor var_3066 = mul(x = var_3005, y = const_80_promoted)[name = string("op_3066")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45 = concat(axis = var_3064, interleave = input_45_interleave_0, values = (var_3005, var_3066))[name = string("input_45")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3061_to_fp16 = const()[name = string("op_3061_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3061_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384000)))]; + tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3080_to_fp16)[name = string("q_5_cast_fp16")]; + int32 var_3091 = const()[name = string("op_3091"), val = int32(-1)]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_3093 = mul(x = var_3027, y = const_84_promoted)[name = string("op_3093")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47 = concat(axis = var_3091, interleave = input_47_interleave_0, values = (var_3027, var_3093))[name = string("input_47")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3088_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor var_3107_to_fp16 = const()[name = string("op_3107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384576)))]; + tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3107_to_fp16)[name = string("k_5_cast_fp16")]; + tensor var_3109_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3109_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3130_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_3130_cast_fp16")]; + int32 var_3132 = const()[name = string("op_3132"), val = int32(-1)]; + bool var_3133_interleave_0 = const()[name = string("op_3133_interleave_0"), val = bool(false)]; + tensor var_3133_cast_fp16 = concat(axis = var_3132, interleave = var_3133_interleave_0, values = (var_3130_cast_fp16, x1_9_cast_fp16))[name = string("op_3133_cast_fp16")]; + tensor var_3134_cast_fp16 = mul(x = var_3133_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3134_cast_fp16")]; + tensor query_states_9_cast_fp16 = add(x = var_3109_cast_fp16, y = var_3134_cast_fp16)[name = string("query_states_9_cast_fp16")]; + tensor var_3137_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3137_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3158_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_3158_cast_fp16")]; + int32 var_3160 = const()[name = string("op_3160"), val = int32(-1)]; + bool var_3161_interleave_0 = const()[name = string("op_3161_interleave_0"), val = bool(false)]; + tensor var_3161_cast_fp16 = concat(axis = var_3160, interleave = var_3161_interleave_0, values = (var_3158_cast_fp16, x1_11_cast_fp16))[name = string("op_3161_cast_fp16")]; + tensor var_3162_cast_fp16 = mul(x = var_3161_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3162_cast_fp16")]; + tensor key_states_9_cast_fp16 = add(x = var_3137_cast_fp16, y = var_3162_cast_fp16)[name = string("key_states_9_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1955, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = key_states_9_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([24])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([25])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1955, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = var_3049, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_5")]; + tensor var_3217_begin_0 = const()[name = string("op_3217_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_3217_end_0 = const()[name = string("op_3217_end_0"), val = tensor([3, 1, 512, 256])]; + tensor var_3217_end_mask_0 = const()[name = string("op_3217_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3217_cast_fp16 = slice_by_index(begin = var_3217_begin_0, end = var_3217_end_0, end_mask = var_3217_end_mask_0, x = coreml_update_state_57)[name = string("op_3217_cast_fp16")]; + tensor var_3224_begin_0 = const()[name = string("op_3224_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_3224_end_0 = const()[name = string("op_3224_end_0"), val = tensor([25, 1, 512, 256])]; + tensor var_3224_end_mask_0 = const()[name = string("op_3224_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3224_cast_fp16 = slice_by_index(begin = var_3224_begin_0, end = var_3224_end_0, end_mask = var_3224_end_mask_0, x = coreml_update_state_57)[name = string("op_3224_cast_fp16")]; + tensor var_3261 = const()[name = string("op_3261"), val = tensor([1, 4, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_3261, x = var_3217_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_3281 = const()[name = string("op_3281"), val = tensor([1, 4, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_3281, x = var_3224_cast_fp16)[name = string("x_43_cast_fp16")]; + bool var_3308_transpose_x_1 = const()[name = string("op_3308_transpose_x_1"), val = bool(false)]; + bool var_3308_transpose_y_1 = const()[name = string("op_3308_transpose_y_1"), val = bool(true)]; + tensor var_3308 = matmul(transpose_x = var_3308_transpose_x_1, transpose_y = var_3308_transpose_y_1, x = query_states_9_cast_fp16, y = x_37_cast_fp16)[name = string("op_3308")]; + fp16 var_3309_to_fp16 = const()[name = string("op_3309_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_3308, y = var_3309_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = var_2129)[name = string("attn_weights_15_cast_fp16")]; + int32 var_3344 = const()[name = string("op_3344"), val = int32(-1)]; + tensor attn_weights_17_cast_fp16 = softmax(axis = var_3344, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; + bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = x_43_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_3355_perm_0 = const()[name = string("op_3355_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3359 = const()[name = string("op_3359"), val = tensor([1, 1, 1024])]; + tensor var_3355_cast_fp16 = transpose(perm = var_3355_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_159")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_3359, x = var_3355_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3364 = const()[name = string("op_3364"), val = tensor([0, 2, 1])]; + string var_3380_pad_type_0 = const()[name = string("op_3380_pad_type_0"), val = string("valid")]; + int32 var_3380_groups_0 = const()[name = string("op_3380_groups_0"), val = int32(1)]; + tensor var_3380_strides_0 = const()[name = string("op_3380_strides_0"), val = tensor([1])]; + tensor var_3380_pad_0 = const()[name = string("op_3380_pad_0"), val = tensor([0, 0])]; + tensor var_3380_dilations_0 = const()[name = string("op_3380_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418385152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419269952))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3365_cast_fp16 = transpose(perm = var_3364, x = attn_output_25_cast_fp16)[name = string("transpose_158")]; + tensor var_3380_cast_fp16 = conv(dilations = var_3380_dilations_0, groups = var_3380_groups_0, pad = var_3380_pad_0, pad_type = var_3380_pad_type_0, strides = var_3380_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3365_cast_fp16)[name = string("op_3380_cast_fp16")]; + tensor var_3384 = const()[name = string("op_3384"), val = tensor([0, 2, 1])]; + int32 var_3395 = const()[name = string("op_3395"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_45_cast_fp16 = transpose(perm = var_3384, x = var_3380_cast_fp16)[name = string("transpose_157")]; + tensor var_3397_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3397_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_3395, interleave = input_51_interleave_0, values = (hidden_states_45_cast_fp16, var_3397_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3392_to_fp16 = const()[name = string("op_3392_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3392_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor var_3411_to_fp16 = const()[name = string("op_3411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419306880)))]; + tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3411_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + int32 var_3424 = const()[name = string("op_3424"), val = int32(-1)]; + fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3426_cast_fp16 = mul(x = hidden_states_47_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_3426_cast_fp16")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53_cast_fp16 = concat(axis = var_3424, interleave = input_53_interleave_0, values = (hidden_states_47_cast_fp16, var_3426_cast_fp16))[name = string("input_53_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3421_to_fp16 = const()[name = string("op_3421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3421_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor var_3440_to_fp16 = const()[name = string("op_3440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419309248)))]; + tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3440_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_3452 = const()[name = string("op_3452"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_3453_cast_fp16 = transpose(perm = var_3452, x = x_45_cast_fp16)[name = string("transpose_156")]; + tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3453_cast_fp16)[name = string("input_55_cast_fp16")]; + string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; + tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; + tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; + int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419311616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425283648))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425504896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431476928))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; + string var_3478_mode_0 = const()[name = string("op_3478_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3478_cast_fp16 = gelu(mode = var_3478_mode_0, x = x_47_cast_fp16)[name = string("op_3478_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = var_3478_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431698176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437670208))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; + tensor var_3486_axes_0 = const()[name = string("op_3486_axes_0"), val = tensor([2])]; + tensor var_3486_cast_fp16 = squeeze(axes = var_3486_axes_0, x = e_5_cast_fp16)[name = string("op_3486_cast_fp16")]; + tensor var_3487 = const()[name = string("op_3487"), val = tensor([0, 2, 1])]; + int32 var_3498 = const()[name = string("op_3498"), val = int32(-1)]; + fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_49_cast_fp16 = transpose(perm = var_3487, x = var_3486_cast_fp16)[name = string("transpose_155")]; + tensor var_3500_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_3500_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_3498, interleave = input_59_interleave_0, values = (hidden_states_49_cast_fp16, var_3500_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3495_to_fp16 = const()[name = string("op_3495_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3495_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; + tensor var_3514_to_fp16 = const()[name = string("op_3514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437707136)))]; + tensor hidden_states_51_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3514_to_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + int32 var_3565 = const()[name = string("op_3565"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3567_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3567_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_3565, interleave = input_61_interleave_0, values = (hidden_states_53_cast_fp16, var_3567_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3562_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; + tensor var_3581_to_fp16 = const()[name = string("op_3581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437709504)))]; + tensor hidden_states_55_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3581_to_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor var_3586 = const()[name = string("op_3586"), val = tensor([0, 2, 1])]; + tensor var_3589_axes_0 = const()[name = string("op_3589_axes_0"), val = tensor([2])]; + tensor var_3587_cast_fp16 = transpose(perm = var_3586, x = hidden_states_55_cast_fp16)[name = string("transpose_154")]; + tensor var_3589_cast_fp16 = expand_dims(axes = var_3589_axes_0, x = var_3587_cast_fp16)[name = string("op_3589_cast_fp16")]; + string var_3605_pad_type_0 = const()[name = string("op_3605_pad_type_0"), val = string("valid")]; + tensor var_3605_strides_0 = const()[name = string("op_3605_strides_0"), val = tensor([1, 1])]; + tensor var_3605_pad_0 = const()[name = string("op_3605_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3605_dilations_0 = const()[name = string("op_3605_dilations_0"), val = tensor([1, 1])]; + int32 var_3605_groups_0 = const()[name = string("op_3605_groups_0"), val = int32(1)]; + tensor var_3605 = conv(dilations = var_3605_dilations_0, groups = var_3605_groups_0, pad = var_3605_pad_0, pad_type = var_3605_pad_type_0, strides = var_3605_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3589_cast_fp16)[name = string("op_3605")]; + tensor var_3610 = const()[name = string("op_3610"), val = tensor([1, 4, 1, 256])]; + tensor var_3611 = reshape(shape = var_3610, x = var_3605)[name = string("op_3611")]; + string var_3627_pad_type_0 = const()[name = string("op_3627_pad_type_0"), val = string("valid")]; + tensor var_3627_strides_0 = const()[name = string("op_3627_strides_0"), val = tensor([1, 1])]; + tensor var_3627_pad_0 = const()[name = string("op_3627_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3627_dilations_0 = const()[name = string("op_3627_dilations_0"), val = tensor([1, 1])]; + int32 var_3627_groups_0 = const()[name = string("op_3627_groups_0"), val = int32(1)]; + tensor var_3627 = conv(dilations = var_3627_dilations_0, groups = var_3627_groups_0, pad = var_3627_pad_0, pad_type = var_3627_pad_type_0, strides = var_3627_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3589_cast_fp16)[name = string("op_3627")]; + tensor var_3632 = const()[name = string("op_3632"), val = tensor([1, 1, 1, 256])]; + tensor var_3633 = reshape(shape = var_3632, x = var_3627)[name = string("op_3633")]; + string var_3649_pad_type_0 = const()[name = string("op_3649_pad_type_0"), val = string("valid")]; + tensor var_3649_strides_0 = const()[name = string("op_3649_strides_0"), val = tensor([1, 1])]; + tensor var_3649_pad_0 = const()[name = string("op_3649_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3649_dilations_0 = const()[name = string("op_3649_dilations_0"), val = tensor([1, 1])]; + int32 var_3649_groups_0 = const()[name = string("op_3649_groups_0"), val = int32(1)]; + tensor var_3649 = conv(dilations = var_3649_dilations_0, groups = var_3649_groups_0, pad = var_3649_pad_0, pad_type = var_3649_pad_type_0, strides = var_3649_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3589_cast_fp16)[name = string("op_3649")]; + tensor var_3654 = const()[name = string("op_3654"), val = tensor([1, 1, 1, 256])]; + tensor var_3655 = reshape(shape = var_3654, x = var_3649)[name = string("op_3655")]; + int32 var_3670 = const()[name = string("op_3670"), val = int32(-1)]; + fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; + tensor var_3672 = mul(x = var_3611, y = const_118_promoted)[name = string("op_3672")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_3670, interleave = input_65_interleave_0, values = (var_3611, var_3672))[name = string("input_65")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_3667_to_fp16 = const()[name = string("op_3667_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3667_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; + tensor var_3686_to_fp16 = const()[name = string("op_3686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437711872)))]; + tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3686_to_fp16)[name = string("q_7_cast_fp16")]; + int32 var_3697 = const()[name = string("op_3697"), val = int32(-1)]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_3699 = mul(x = var_3633, y = const_122_promoted)[name = string("op_3699")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_3697, interleave = input_67_interleave_0, values = (var_3633, var_3699))[name = string("input_67")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_3694_to_fp16 = const()[name = string("op_3694_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3694_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; + tensor var_3713_to_fp16 = const()[name = string("op_3713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437712448)))]; + tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3713_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_3715_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3715_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3736_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_3736_cast_fp16")]; + int32 var_3738 = const()[name = string("op_3738"), val = int32(-1)]; + bool var_3739_interleave_0 = const()[name = string("op_3739_interleave_0"), val = bool(false)]; + tensor var_3739_cast_fp16 = concat(axis = var_3738, interleave = var_3739_interleave_0, values = (var_3736_cast_fp16, x1_13_cast_fp16))[name = string("op_3739_cast_fp16")]; + tensor var_3740_cast_fp16 = mul(x = var_3739_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3740_cast_fp16")]; + tensor query_states_13_cast_fp16 = add(x = var_3715_cast_fp16, y = var_3740_cast_fp16)[name = string("query_states_13_cast_fp16")]; + tensor var_3743_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3743_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3764_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_3764_cast_fp16")]; + int32 var_3766 = const()[name = string("op_3766"), val = int32(-1)]; + bool var_3767_interleave_0 = const()[name = string("op_3767_interleave_0"), val = bool(false)]; + tensor var_3767_cast_fp16 = concat(axis = var_3766, interleave = var_3767_interleave_0, values = (var_3764_cast_fp16, x1_15_cast_fp16))[name = string("op_3767_cast_fp16")]; + tensor var_3768_cast_fp16 = mul(x = var_3767_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3768_cast_fp16")]; + tensor key_states_13_cast_fp16 = add(x = var_3743_cast_fp16, y = var_3768_cast_fp16)[name = string("key_states_13_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1955, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = key_states_13_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([25])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([26])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1955, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = var_3655, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_7")]; + tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_3823_end_0 = const()[name = string("op_3823_end_0"), val = tensor([4, 1, 512, 256])]; + tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = var_3823_end_0, end_mask = var_3823_end_mask_0, x = coreml_update_state_59)[name = string("op_3823_cast_fp16")]; + tensor var_3830_begin_0 = const()[name = string("op_3830_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_3830_end_0 = const()[name = string("op_3830_end_0"), val = tensor([26, 1, 512, 256])]; + tensor var_3830_end_mask_0 = const()[name = string("op_3830_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3830_cast_fp16 = slice_by_index(begin = var_3830_begin_0, end = var_3830_end_0, end_mask = var_3830_end_mask_0, x = coreml_update_state_59)[name = string("op_3830_cast_fp16")]; + tensor var_3867 = const()[name = string("op_3867"), val = tensor([1, 4, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_3867, x = var_3823_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_3887 = const()[name = string("op_3887"), val = tensor([1, 4, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_3887, x = var_3830_cast_fp16)[name = string("x_59_cast_fp16")]; + bool var_3914_transpose_x_1 = const()[name = string("op_3914_transpose_x_1"), val = bool(false)]; + bool var_3914_transpose_y_1 = const()[name = string("op_3914_transpose_y_1"), val = bool(true)]; + tensor var_3914 = matmul(transpose_x = var_3914_transpose_x_1, transpose_y = var_3914_transpose_y_1, x = query_states_13_cast_fp16, y = x_53_cast_fp16)[name = string("op_3914")]; + fp16 var_3915_to_fp16 = const()[name = string("op_3915_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_19_cast_fp16 = mul(x = var_3914, y = var_3915_to_fp16)[name = string("attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = var_2129)[name = string("attn_weights_21_cast_fp16")]; + int32 var_3950 = const()[name = string("op_3950"), val = int32(-1)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_3950, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = x_59_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_3961_perm_0 = const()[name = string("op_3961_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3965 = const()[name = string("op_3965"), val = tensor([1, 1, 1024])]; + tensor var_3961_cast_fp16 = transpose(perm = var_3961_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_153")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_3965, x = var_3961_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_3970 = const()[name = string("op_3970"), val = tensor([0, 2, 1])]; + string var_3986_pad_type_0 = const()[name = string("op_3986_pad_type_0"), val = string("valid")]; + int32 var_3986_groups_0 = const()[name = string("op_3986_groups_0"), val = int32(1)]; + tensor var_3986_strides_0 = const()[name = string("op_3986_strides_0"), val = tensor([1])]; + tensor var_3986_pad_0 = const()[name = string("op_3986_pad_0"), val = tensor([0, 0])]; + tensor var_3986_dilations_0 = const()[name = string("op_3986_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437713024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597824))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3971_cast_fp16 = transpose(perm = var_3970, x = attn_output_35_cast_fp16)[name = string("transpose_152")]; + tensor var_3986_cast_fp16 = conv(dilations = var_3986_dilations_0, groups = var_3986_groups_0, pad = var_3986_pad_0, pad_type = var_3986_pad_type_0, strides = var_3986_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3971_cast_fp16)[name = string("op_3986_cast_fp16")]; + tensor var_3990 = const()[name = string("op_3990"), val = tensor([0, 2, 1])]; + int32 var_4001 = const()[name = string("op_4001"), val = int32(-1)]; + fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_61_cast_fp16 = transpose(perm = var_3990, x = var_3986_cast_fp16)[name = string("transpose_151")]; + tensor var_4003_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_4003_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_4001, interleave = input_71_interleave_0, values = (hidden_states_61_cast_fp16, var_4003_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_3998_to_fp16 = const()[name = string("op_3998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_3998_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; + tensor var_4017_to_fp16 = const()[name = string("op_4017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438634752)))]; + tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4017_to_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + int32 var_4030 = const()[name = string("op_4030"), val = int32(-1)]; + fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4032_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4032_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_4030, interleave = input_73_interleave_0, values = (hidden_states_63_cast_fp16, var_4032_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4027_to_fp16 = const()[name = string("op_4027_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4027_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; + tensor var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438637120)))]; + tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4046_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_4058 = const()[name = string("op_4058"), val = tensor([0, 2, 1])]; + tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; + tensor var_4059_cast_fp16 = transpose(perm = var_4058, x = x_61_cast_fp16)[name = string("transpose_150")]; + tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4059_cast_fp16)[name = string("input_75_cast_fp16")]; + string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; + tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; + tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; + int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438639488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444611520))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450804800))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; + string var_4084_mode_0 = const()[name = string("op_4084_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4084_cast_fp16 = gelu(mode = var_4084_mode_0, x = x_63_cast_fp16)[name = string("op_4084_cast_fp16")]; + tensor input_77_cast_fp16 = mul(x = var_4084_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451026048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456998080))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; + tensor var_4092_axes_0 = const()[name = string("op_4092_axes_0"), val = tensor([2])]; + tensor var_4092_cast_fp16 = squeeze(axes = var_4092_axes_0, x = e_7_cast_fp16)[name = string("op_4092_cast_fp16")]; + tensor var_4093 = const()[name = string("op_4093"), val = tensor([0, 2, 1])]; + int32 var_4104 = const()[name = string("op_4104"), val = int32(-1)]; + fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_65_cast_fp16 = transpose(perm = var_4093, x = var_4092_cast_fp16)[name = string("transpose_149")]; + tensor var_4106_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_4106_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_4104, interleave = input_79_interleave_0, values = (hidden_states_65_cast_fp16, var_4106_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4101_to_fp16 = const()[name = string("op_4101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4101_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor var_4120_to_fp16 = const()[name = string("op_4120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457035008)))]; + tensor hidden_states_67_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4120_to_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = hidden_states_67_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_4171 = const()[name = string("op_4171"), val = int32(-1)]; + fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4173_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_152_promoted_to_fp16)[name = string("op_4173_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_4171, interleave = input_81_interleave_0, values = (hidden_states_69_cast_fp16, var_4173_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4168_to_fp16 = const()[name = string("op_4168_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4168_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor var_4187_to_fp16 = const()[name = string("op_4187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457037376)))]; + tensor hidden_states_71_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4187_to_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor var_4192 = const()[name = string("op_4192"), val = tensor([0, 2, 1])]; + tensor var_4195_axes_0 = const()[name = string("op_4195_axes_0"), val = tensor([2])]; + tensor var_4193_cast_fp16 = transpose(perm = var_4192, x = hidden_states_71_cast_fp16)[name = string("transpose_148")]; + tensor var_4195_cast_fp16 = expand_dims(axes = var_4195_axes_0, x = var_4193_cast_fp16)[name = string("op_4195_cast_fp16")]; + string var_4211_pad_type_0 = const()[name = string("op_4211_pad_type_0"), val = string("valid")]; + tensor var_4211_strides_0 = const()[name = string("op_4211_strides_0"), val = tensor([1, 1])]; + tensor var_4211_pad_0 = const()[name = string("op_4211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4211_dilations_0 = const()[name = string("op_4211_dilations_0"), val = tensor([1, 1])]; + int32 var_4211_groups_0 = const()[name = string("op_4211_groups_0"), val = int32(1)]; + tensor var_4211 = conv(dilations = var_4211_dilations_0, groups = var_4211_groups_0, pad = var_4211_pad_0, pad_type = var_4211_pad_type_0, strides = var_4211_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4195_cast_fp16)[name = string("op_4211")]; + tensor var_4216 = const()[name = string("op_4216"), val = tensor([1, 4, 1, 256])]; + tensor var_4217 = reshape(shape = var_4216, x = var_4211)[name = string("op_4217")]; + string var_4233_pad_type_0 = const()[name = string("op_4233_pad_type_0"), val = string("valid")]; + tensor var_4233_strides_0 = const()[name = string("op_4233_strides_0"), val = tensor([1, 1])]; + tensor var_4233_pad_0 = const()[name = string("op_4233_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4233_dilations_0 = const()[name = string("op_4233_dilations_0"), val = tensor([1, 1])]; + int32 var_4233_groups_0 = const()[name = string("op_4233_groups_0"), val = int32(1)]; + tensor var_4233 = conv(dilations = var_4233_dilations_0, groups = var_4233_groups_0, pad = var_4233_pad_0, pad_type = var_4233_pad_type_0, strides = var_4233_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4195_cast_fp16)[name = string("op_4233")]; + tensor var_4238 = const()[name = string("op_4238"), val = tensor([1, 1, 1, 256])]; + tensor var_4239 = reshape(shape = var_4238, x = var_4233)[name = string("op_4239")]; + string var_4255_pad_type_0 = const()[name = string("op_4255_pad_type_0"), val = string("valid")]; + tensor var_4255_strides_0 = const()[name = string("op_4255_strides_0"), val = tensor([1, 1])]; + tensor var_4255_pad_0 = const()[name = string("op_4255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4255_dilations_0 = const()[name = string("op_4255_dilations_0"), val = tensor([1, 1])]; + int32 var_4255_groups_0 = const()[name = string("op_4255_groups_0"), val = int32(1)]; + tensor var_4255 = conv(dilations = var_4255_dilations_0, groups = var_4255_groups_0, pad = var_4255_pad_0, pad_type = var_4255_pad_type_0, strides = var_4255_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4195_cast_fp16)[name = string("op_4255")]; + tensor var_4260 = const()[name = string("op_4260"), val = tensor([1, 1, 1, 256])]; + tensor var_4261 = reshape(shape = var_4260, x = var_4255)[name = string("op_4261")]; + int32 var_4276 = const()[name = string("op_4276"), val = int32(-1)]; + fp16 const_156_promoted = const()[name = string("const_156_promoted"), val = fp16(-0x1p+0)]; + tensor var_4278 = mul(x = var_4217, y = const_156_promoted)[name = string("op_4278")]; + bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; + tensor input_85 = concat(axis = var_4276, interleave = input_85_interleave_0, values = (var_4217, var_4278))[name = string("input_85")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4273_to_fp16 = const()[name = string("op_4273_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4273_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor var_4292_to_fp16 = const()[name = string("op_4292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457039744)))]; + tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4292_to_fp16)[name = string("q_9_cast_fp16")]; + int32 var_4303 = const()[name = string("op_4303"), val = int32(-1)]; + fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; + tensor var_4305 = mul(x = var_4239, y = const_160_promoted)[name = string("op_4305")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_4303, interleave = input_87_interleave_0, values = (var_4239, var_4305))[name = string("input_87")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4300_to_fp16 = const()[name = string("op_4300_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4300_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor var_4319_to_fp16 = const()[name = string("op_4319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040320)))]; + tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4319_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_4321_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4321_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4342_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_4342_cast_fp16")]; + int32 var_4344 = const()[name = string("op_4344"), val = int32(-1)]; + bool var_4345_interleave_0 = const()[name = string("op_4345_interleave_0"), val = bool(false)]; + tensor var_4345_cast_fp16 = concat(axis = var_4344, interleave = var_4345_interleave_0, values = (var_4342_cast_fp16, x1_17_cast_fp16))[name = string("op_4345_cast_fp16")]; + tensor var_4346_cast_fp16 = mul(x = var_4345_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4346_cast_fp16")]; + tensor query_states_17_cast_fp16 = add(x = var_4321_cast_fp16, y = var_4346_cast_fp16)[name = string("query_states_17_cast_fp16")]; + tensor var_4349_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4349_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4370_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4370_cast_fp16")]; + int32 var_4372 = const()[name = string("op_4372"), val = int32(-1)]; + bool var_4373_interleave_0 = const()[name = string("op_4373_interleave_0"), val = bool(false)]; + tensor var_4373_cast_fp16 = concat(axis = var_4372, interleave = var_4373_interleave_0, values = (var_4370_cast_fp16, x1_19_cast_fp16))[name = string("op_4373_cast_fp16")]; + tensor var_4374_cast_fp16 = mul(x = var_4373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4374_cast_fp16")]; + tensor key_states_17_cast_fp16 = add(x = var_4349_cast_fp16, y = var_4374_cast_fp16)[name = string("key_states_17_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1955, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = key_states_17_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([26])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([27])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1955, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = var_4261, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_9")]; + tensor var_4429_begin_0 = const()[name = string("op_4429_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_4429_end_0 = const()[name = string("op_4429_end_0"), val = tensor([5, 1, 512, 256])]; + tensor var_4429_end_mask_0 = const()[name = string("op_4429_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4429_cast_fp16 = slice_by_index(begin = var_4429_begin_0, end = var_4429_end_0, end_mask = var_4429_end_mask_0, x = coreml_update_state_61)[name = string("op_4429_cast_fp16")]; + tensor var_4436_begin_0 = const()[name = string("op_4436_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_4436_end_0 = const()[name = string("op_4436_end_0"), val = tensor([27, 1, 512, 256])]; + tensor var_4436_end_mask_0 = const()[name = string("op_4436_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4436_cast_fp16 = slice_by_index(begin = var_4436_begin_0, end = var_4436_end_0, end_mask = var_4436_end_mask_0, x = coreml_update_state_61)[name = string("op_4436_cast_fp16")]; + tensor var_4473 = const()[name = string("op_4473"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_4473, x = var_4429_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_4493 = const()[name = string("op_4493"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_4493, x = var_4436_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_4520_transpose_x_1 = const()[name = string("op_4520_transpose_x_1"), val = bool(false)]; + bool var_4520_transpose_y_1 = const()[name = string("op_4520_transpose_y_1"), val = bool(true)]; + tensor var_4520 = matmul(transpose_x = var_4520_transpose_x_1, transpose_y = var_4520_transpose_y_1, x = query_states_17_cast_fp16, y = x_69_cast_fp16)[name = string("op_4520")]; + fp16 var_4521_to_fp16 = const()[name = string("op_4521_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_4520, y = var_4521_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = var_2129)[name = string("attn_weights_27_cast_fp16")]; + int32 var_4556 = const()[name = string("op_4556"), val = int32(-1)]; + tensor attn_weights_29_cast_fp16 = softmax(axis = var_4556, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; + bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; + tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = x_75_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_4567_perm_0 = const()[name = string("op_4567_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4571 = const()[name = string("op_4571"), val = tensor([1, 1, 1024])]; + tensor var_4567_cast_fp16 = transpose(perm = var_4567_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_147")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_4571, x = var_4567_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_4576 = const()[name = string("op_4576"), val = tensor([0, 2, 1])]; + string var_4592_pad_type_0 = const()[name = string("op_4592_pad_type_0"), val = string("valid")]; + int32 var_4592_groups_0 = const()[name = string("op_4592_groups_0"), val = int32(1)]; + tensor var_4592_strides_0 = const()[name = string("op_4592_strides_0"), val = tensor([1])]; + tensor var_4592_pad_0 = const()[name = string("op_4592_pad_0"), val = tensor([0, 0])]; + tensor var_4592_dilations_0 = const()[name = string("op_4592_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457925696))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4577_cast_fp16 = transpose(perm = var_4576, x = attn_output_45_cast_fp16)[name = string("transpose_146")]; + tensor var_4592_cast_fp16 = conv(dilations = var_4592_dilations_0, groups = var_4592_groups_0, pad = var_4592_pad_0, pad_type = var_4592_pad_type_0, strides = var_4592_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4577_cast_fp16)[name = string("op_4592_cast_fp16")]; + tensor var_4596 = const()[name = string("op_4596"), val = tensor([0, 2, 1])]; + int32 var_4607 = const()[name = string("op_4607"), val = int32(-1)]; + fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_77_cast_fp16 = transpose(perm = var_4596, x = var_4592_cast_fp16)[name = string("transpose_145")]; + tensor var_4609_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_4609_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_4607, interleave = input_91_interleave_0, values = (hidden_states_77_cast_fp16, var_4609_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_4604_to_fp16 = const()[name = string("op_4604_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4604_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457962624)))]; + tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_4623_to_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_4636 = const()[name = string("op_4636"), val = int32(-1)]; + fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4638_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_4638_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_4636, interleave = input_93_interleave_0, values = (hidden_states_79_cast_fp16, var_4638_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_4633_to_fp16 = const()[name = string("op_4633_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_4633_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor var_4652_to_fp16 = const()[name = string("op_4652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457964992)))]; + tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_4652_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_4664 = const()[name = string("op_4664"), val = tensor([0, 2, 1])]; + tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; + tensor var_4665_cast_fp16 = transpose(perm = var_4664, x = x_77_cast_fp16)[name = string("transpose_144")]; + tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_4665_cast_fp16)[name = string("input_95_cast_fp16")]; + string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; + tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; + tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; + int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463939392))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464160640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470132672))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; + string var_4690_mode_0 = const()[name = string("op_4690_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4690_cast_fp16 = gelu(mode = var_4690_mode_0, x = x_79_cast_fp16)[name = string("op_4690_cast_fp16")]; + tensor input_97_cast_fp16 = mul(x = var_4690_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470353920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476325952))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; + tensor var_4698_axes_0 = const()[name = string("op_4698_axes_0"), val = tensor([2])]; + tensor var_4698_cast_fp16 = squeeze(axes = var_4698_axes_0, x = e_9_cast_fp16)[name = string("op_4698_cast_fp16")]; + tensor var_4699 = const()[name = string("op_4699"), val = tensor([0, 2, 1])]; + int32 var_4710 = const()[name = string("op_4710"), val = int32(-1)]; + fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_81_cast_fp16 = transpose(perm = var_4699, x = var_4698_cast_fp16)[name = string("transpose_143")]; + tensor var_4712_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_4712_cast_fp16")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99_cast_fp16 = concat(axis = var_4710, interleave = input_99_interleave_0, values = (hidden_states_81_cast_fp16, var_4712_cast_fp16))[name = string("input_99_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_4707_to_fp16 = const()[name = string("op_4707_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4707_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; + tensor var_4726_to_fp16 = const()[name = string("op_4726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476362880)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_4726_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + int32 var_4738_axis_0 = const()[name = string("op_4738_axis_0"), val = int32(1)]; + int32 var_4738_batch_dims_0 = const()[name = string("op_4738_batch_dims_0"), val = int32(0)]; + bool var_4738_validate_indices_0 = const()[name = string("op_4738_validate_indices_0"), val = bool(false)]; + tensor var_4730_to_fp16 = const()[name = string("op_4730_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476365248)))]; + tensor var_4738_cast_fp16_cast_uint16 = gather(axis = var_4738_axis_0, batch_dims = var_4738_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4738_validate_indices_0, x = var_4730_to_fp16)[name = string("op_4738_cast_fp16_cast_uint16")]; + tensor var_4743 = const()[name = string("op_4743"), val = tensor([1, 1, 1, -1])]; + tensor sin_21_cast_fp16 = reshape(shape = var_4743, x = var_4738_cast_fp16_cast_uint16)[name = string("sin_21_cast_fp16")]; + int32 var_4753_axis_0 = const()[name = string("op_4753_axis_0"), val = int32(1)]; + int32 var_4753_batch_dims_0 = const()[name = string("op_4753_batch_dims_0"), val = int32(0)]; + bool var_4753_validate_indices_0 = const()[name = string("op_4753_validate_indices_0"), val = bool(false)]; + tensor var_4745_to_fp16 = const()[name = string("op_4745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480559616)))]; + tensor var_4753_cast_fp16_cast_uint16 = gather(axis = var_4753_axis_0, batch_dims = var_4753_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4753_validate_indices_0, x = var_4745_to_fp16)[name = string("op_4753_cast_fp16_cast_uint16")]; + tensor var_4758 = const()[name = string("op_4758"), val = tensor([1, 1, 1, -1])]; + tensor cos_21_cast_fp16 = reshape(shape = var_4758, x = var_4753_cast_fp16_cast_uint16)[name = string("cos_21_cast_fp16")]; + int32 var_4779 = const()[name = string("op_4779"), val = int32(-1)]; + fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4781_cast_fp16 = mul(x = hidden_states_85_cast_fp16, y = const_190_promoted_to_fp16)[name = string("op_4781_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_4779, interleave = input_101_interleave_0, values = (hidden_states_85_cast_fp16, var_4781_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_4776_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; + tensor var_4795_to_fp16 = const()[name = string("op_4795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484753984)))]; + tensor hidden_states_87_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_4795_to_fp16)[name = string("hidden_states_87_cast_fp16")]; + tensor var_4800 = const()[name = string("op_4800"), val = tensor([0, 2, 1])]; + tensor var_4803_axes_0 = const()[name = string("op_4803_axes_0"), val = tensor([2])]; + tensor var_4801_cast_fp16 = transpose(perm = var_4800, x = hidden_states_87_cast_fp16)[name = string("transpose_142")]; + tensor var_4803_cast_fp16 = expand_dims(axes = var_4803_axes_0, x = var_4801_cast_fp16)[name = string("op_4803_cast_fp16")]; + string var_4819_pad_type_0 = const()[name = string("op_4819_pad_type_0"), val = string("valid")]; + tensor var_4819_strides_0 = const()[name = string("op_4819_strides_0"), val = tensor([1, 1])]; + tensor var_4819_pad_0 = const()[name = string("op_4819_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4819_dilations_0 = const()[name = string("op_4819_dilations_0"), val = tensor([1, 1])]; + int32 var_4819_groups_0 = const()[name = string("op_4819_groups_0"), val = int32(1)]; + tensor var_4819 = conv(dilations = var_4819_dilations_0, groups = var_4819_groups_0, pad = var_4819_pad_0, pad_type = var_4819_pad_type_0, strides = var_4819_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4803_cast_fp16)[name = string("op_4819")]; + tensor var_4824 = const()[name = string("op_4824"), val = tensor([1, 4, 1, 256])]; + tensor var_4825 = reshape(shape = var_4824, x = var_4819)[name = string("op_4825")]; + string var_4841_pad_type_0 = const()[name = string("op_4841_pad_type_0"), val = string("valid")]; + tensor var_4841_strides_0 = const()[name = string("op_4841_strides_0"), val = tensor([1, 1])]; + tensor var_4841_pad_0 = const()[name = string("op_4841_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4841_dilations_0 = const()[name = string("op_4841_dilations_0"), val = tensor([1, 1])]; + int32 var_4841_groups_0 = const()[name = string("op_4841_groups_0"), val = int32(1)]; + tensor var_4841 = conv(dilations = var_4841_dilations_0, groups = var_4841_groups_0, pad = var_4841_pad_0, pad_type = var_4841_pad_type_0, strides = var_4841_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4803_cast_fp16)[name = string("op_4841")]; + tensor var_4846 = const()[name = string("op_4846"), val = tensor([1, 1, 1, 256])]; + tensor var_4847 = reshape(shape = var_4846, x = var_4841)[name = string("op_4847")]; + string var_4863_pad_type_0 = const()[name = string("op_4863_pad_type_0"), val = string("valid")]; + tensor var_4863_strides_0 = const()[name = string("op_4863_strides_0"), val = tensor([1, 1])]; + tensor var_4863_pad_0 = const()[name = string("op_4863_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4863_dilations_0 = const()[name = string("op_4863_dilations_0"), val = tensor([1, 1])]; + int32 var_4863_groups_0 = const()[name = string("op_4863_groups_0"), val = int32(1)]; + tensor var_4863 = conv(dilations = var_4863_dilations_0, groups = var_4863_groups_0, pad = var_4863_pad_0, pad_type = var_4863_pad_type_0, strides = var_4863_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4803_cast_fp16)[name = string("op_4863")]; + tensor var_4868 = const()[name = string("op_4868"), val = tensor([1, 1, 1, 256])]; + tensor var_4869 = reshape(shape = var_4868, x = var_4863)[name = string("op_4869")]; + int32 var_4884 = const()[name = string("op_4884"), val = int32(-1)]; + fp16 const_194_promoted = const()[name = string("const_194_promoted"), val = fp16(-0x1p+0)]; + tensor var_4886 = mul(x = var_4825, y = const_194_promoted)[name = string("op_4886")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105 = concat(axis = var_4884, interleave = input_105_interleave_0, values = (var_4825, var_4886))[name = string("input_105")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_4881_to_fp16 = const()[name = string("op_4881_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_4881_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; + tensor var_4900_to_fp16 = const()[name = string("op_4900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756352)))]; + tensor q_11_cast_fp16 = mul(x = normed_127, y = var_4900_to_fp16)[name = string("q_11_cast_fp16")]; + int32 var_4911 = const()[name = string("op_4911"), val = int32(-1)]; + fp16 const_198_promoted = const()[name = string("const_198_promoted"), val = fp16(-0x1p+0)]; + tensor var_4913 = mul(x = var_4847, y = const_198_promoted)[name = string("op_4913")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107 = concat(axis = var_4911, interleave = input_107_interleave_0, values = (var_4847, var_4913))[name = string("input_107")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_4908_to_fp16 = const()[name = string("op_4908_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_4908_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; + tensor var_4927_to_fp16 = const()[name = string("op_4927_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756928)))]; + tensor k_11_cast_fp16 = mul(x = normed_131, y = var_4927_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_4929_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4929_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4950_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_4950_cast_fp16")]; + int32 var_4952 = const()[name = string("op_4952"), val = int32(-1)]; + bool var_4953_interleave_0 = const()[name = string("op_4953_interleave_0"), val = bool(false)]; + tensor var_4953_cast_fp16 = concat(axis = var_4952, interleave = var_4953_interleave_0, values = (var_4950_cast_fp16, x1_21_cast_fp16))[name = string("op_4953_cast_fp16")]; + tensor var_4954_cast_fp16 = mul(x = var_4953_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4954_cast_fp16")]; + tensor query_states_21_cast_fp16 = add(x = var_4929_cast_fp16, y = var_4954_cast_fp16)[name = string("query_states_21_cast_fp16")]; + tensor var_4957_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4957_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4978_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_4978_cast_fp16")]; + int32 var_4980 = const()[name = string("op_4980"), val = int32(-1)]; + bool var_4981_interleave_0 = const()[name = string("op_4981_interleave_0"), val = bool(false)]; + tensor var_4981_cast_fp16 = concat(axis = var_4980, interleave = var_4981_interleave_0, values = (var_4978_cast_fp16, x1_23_cast_fp16))[name = string("op_4981_cast_fp16")]; + tensor var_4982_cast_fp16 = mul(x = var_4981_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4982_cast_fp16")]; + tensor key_states_21_cast_fp16 = add(x = var_4957_cast_fp16, y = var_4982_cast_fp16)[name = string("key_states_21_cast_fp16")]; + tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_21_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_10")]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = var_4869, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_11")]; + tensor var_5037_begin_0 = const()[name = string("op_5037_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5037_end_0 = const()[name = string("op_5037_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_5037_end_mask_0 = const()[name = string("op_5037_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5037_cast_fp16 = slice_by_index(begin = var_5037_begin_0, end = var_5037_end_0, end_mask = var_5037_end_mask_0, x = coreml_update_state_63)[name = string("op_5037_cast_fp16")]; + tensor var_5044_begin_0 = const()[name = string("op_5044_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_5044_end_0 = const()[name = string("op_5044_end_0"), val = tensor([5, 1, 4096, 256])]; + tensor var_5044_end_mask_0 = const()[name = string("op_5044_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5044_cast_fp16 = slice_by_index(begin = var_5044_begin_0, end = var_5044_end_0, end_mask = var_5044_end_mask_0, x = coreml_update_state_63)[name = string("op_5044_cast_fp16")]; + tensor var_5081 = const()[name = string("op_5081"), val = tensor([1, 4, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_5081, x = var_5037_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_5101 = const()[name = string("op_5101"), val = tensor([1, 4, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_5101, x = var_5044_cast_fp16)[name = string("x_91_cast_fp16")]; + bool var_5128_transpose_x_1 = const()[name = string("op_5128_transpose_x_1"), val = bool(false)]; + bool var_5128_transpose_y_1 = const()[name = string("op_5128_transpose_y_1"), val = bool(true)]; + tensor var_5128 = matmul(transpose_x = var_5128_transpose_x_1, transpose_y = var_5128_transpose_y_1, x = query_states_21_cast_fp16, y = x_85_cast_fp16)[name = string("op_5128")]; + fp16 var_5129_to_fp16 = const()[name = string("op_5129_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_31_cast_fp16 = mul(x = var_5128, y = var_5129_to_fp16)[name = string("attn_weights_31_cast_fp16")]; + tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; + int32 var_5164 = const()[name = string("op_5164"), val = int32(-1)]; + tensor attn_weights_35_cast_fp16 = softmax(axis = var_5164, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; + bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; + bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; + tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = x_91_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_5175_perm_0 = const()[name = string("op_5175_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5179 = const()[name = string("op_5179"), val = tensor([1, 1, 1024])]; + tensor var_5175_cast_fp16 = transpose(perm = var_5175_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_141")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_5179, x = var_5175_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_5184 = const()[name = string("op_5184"), val = tensor([0, 2, 1])]; + string var_5200_pad_type_0 = const()[name = string("op_5200_pad_type_0"), val = string("valid")]; + int32 var_5200_groups_0 = const()[name = string("op_5200_groups_0"), val = int32(1)]; + tensor var_5200_strides_0 = const()[name = string("op_5200_strides_0"), val = tensor([1])]; + tensor var_5200_pad_0 = const()[name = string("op_5200_pad_0"), val = tensor([0, 0])]; + tensor var_5200_dilations_0 = const()[name = string("op_5200_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485642304))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5185_cast_fp16 = transpose(perm = var_5184, x = attn_output_55_cast_fp16)[name = string("transpose_140")]; + tensor var_5200_cast_fp16 = conv(dilations = var_5200_dilations_0, groups = var_5200_groups_0, pad = var_5200_pad_0, pad_type = var_5200_pad_type_0, strides = var_5200_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5185_cast_fp16)[name = string("op_5200_cast_fp16")]; + tensor var_5204 = const()[name = string("op_5204"), val = tensor([0, 2, 1])]; + int32 var_5215 = const()[name = string("op_5215"), val = int32(-1)]; + fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_93_cast_fp16 = transpose(perm = var_5204, x = var_5200_cast_fp16)[name = string("transpose_139")]; + tensor var_5217_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5217_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_5215, interleave = input_111_interleave_0, values = (hidden_states_93_cast_fp16, var_5217_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5212_to_fp16 = const()[name = string("op_5212_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5212_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; + tensor var_5231_to_fp16 = const()[name = string("op_5231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485679232)))]; + tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5231_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; + int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)]; + fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5246_cast_fp16 = mul(x = hidden_states_95_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_5246_cast_fp16")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113_cast_fp16 = concat(axis = var_5244, interleave = input_113_interleave_0, values = (hidden_states_95_cast_fp16, var_5246_cast_fp16))[name = string("input_113_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5241_to_fp16 = const()[name = string("op_5241_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5241_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; + tensor var_5260_to_fp16 = const()[name = string("op_5260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485681600)))]; + tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5260_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_5272 = const()[name = string("op_5272"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_5273_cast_fp16 = transpose(perm = var_5272, x = x_93_cast_fp16)[name = string("transpose_138")]; + tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5273_cast_fp16)[name = string("input_115_cast_fp16")]; + string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; + tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; + tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; + int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485683968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491656000))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491877248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497849280))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; + string var_5298_mode_0 = const()[name = string("op_5298_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5298_cast_fp16 = gelu(mode = var_5298_mode_0, x = x_95_cast_fp16)[name = string("op_5298_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = var_5298_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498070528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504042560))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; + tensor var_5306_axes_0 = const()[name = string("op_5306_axes_0"), val = tensor([2])]; + tensor var_5306_cast_fp16 = squeeze(axes = var_5306_axes_0, x = e_11_cast_fp16)[name = string("op_5306_cast_fp16")]; + tensor var_5307 = const()[name = string("op_5307"), val = tensor([0, 2, 1])]; + int32 var_5318 = const()[name = string("op_5318"), val = int32(-1)]; + fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_97_cast_fp16 = transpose(perm = var_5307, x = var_5306_cast_fp16)[name = string("transpose_137")]; + tensor var_5320_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_5320_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_5318, interleave = input_119_interleave_0, values = (hidden_states_97_cast_fp16, var_5320_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_5315_to_fp16 = const()[name = string("op_5315_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5315_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor var_5334_to_fp16 = const()[name = string("op_5334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504079488)))]; + tensor hidden_states_99_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5334_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + int32 var_5385 = const()[name = string("op_5385"), val = int32(-1)]; + fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5387_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_228_promoted_to_fp16)[name = string("op_5387_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_5385, interleave = input_121_interleave_0, values = (hidden_states_101_cast_fp16, var_5387_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_5382_to_fp16 = const()[name = string("op_5382_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5382_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor var_5401_to_fp16 = const()[name = string("op_5401_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504081856)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5401_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_5406 = const()[name = string("op_5406"), val = tensor([0, 2, 1])]; + tensor var_5409_axes_0 = const()[name = string("op_5409_axes_0"), val = tensor([2])]; + tensor var_5407_cast_fp16 = transpose(perm = var_5406, x = hidden_states_103_cast_fp16)[name = string("transpose_136")]; + tensor var_5409_cast_fp16 = expand_dims(axes = var_5409_axes_0, x = var_5407_cast_fp16)[name = string("op_5409_cast_fp16")]; + string var_5425_pad_type_0 = const()[name = string("op_5425_pad_type_0"), val = string("valid")]; + tensor var_5425_strides_0 = const()[name = string("op_5425_strides_0"), val = tensor([1, 1])]; + tensor var_5425_pad_0 = const()[name = string("op_5425_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5425_dilations_0 = const()[name = string("op_5425_dilations_0"), val = tensor([1, 1])]; + int32 var_5425_groups_0 = const()[name = string("op_5425_groups_0"), val = int32(1)]; + tensor var_5425 = conv(dilations = var_5425_dilations_0, groups = var_5425_groups_0, pad = var_5425_pad_0, pad_type = var_5425_pad_type_0, strides = var_5425_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5409_cast_fp16)[name = string("op_5425")]; + tensor var_5430 = const()[name = string("op_5430"), val = tensor([1, 4, 1, 256])]; + tensor var_5431 = reshape(shape = var_5430, x = var_5425)[name = string("op_5431")]; + string var_5447_pad_type_0 = const()[name = string("op_5447_pad_type_0"), val = string("valid")]; + tensor var_5447_strides_0 = const()[name = string("op_5447_strides_0"), val = tensor([1, 1])]; + tensor var_5447_pad_0 = const()[name = string("op_5447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5447_dilations_0 = const()[name = string("op_5447_dilations_0"), val = tensor([1, 1])]; + int32 var_5447_groups_0 = const()[name = string("op_5447_groups_0"), val = int32(1)]; + tensor var_5447 = conv(dilations = var_5447_dilations_0, groups = var_5447_groups_0, pad = var_5447_pad_0, pad_type = var_5447_pad_type_0, strides = var_5447_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5409_cast_fp16)[name = string("op_5447")]; + tensor var_5452 = const()[name = string("op_5452"), val = tensor([1, 1, 1, 256])]; + tensor var_5453 = reshape(shape = var_5452, x = var_5447)[name = string("op_5453")]; + string var_5469_pad_type_0 = const()[name = string("op_5469_pad_type_0"), val = string("valid")]; + tensor var_5469_strides_0 = const()[name = string("op_5469_strides_0"), val = tensor([1, 1])]; + tensor var_5469_pad_0 = const()[name = string("op_5469_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5469_dilations_0 = const()[name = string("op_5469_dilations_0"), val = tensor([1, 1])]; + int32 var_5469_groups_0 = const()[name = string("op_5469_groups_0"), val = int32(1)]; + tensor var_5469 = conv(dilations = var_5469_dilations_0, groups = var_5469_groups_0, pad = var_5469_pad_0, pad_type = var_5469_pad_type_0, strides = var_5469_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5409_cast_fp16)[name = string("op_5469")]; + tensor var_5474 = const()[name = string("op_5474"), val = tensor([1, 1, 1, 256])]; + tensor var_5475 = reshape(shape = var_5474, x = var_5469)[name = string("op_5475")]; + int32 var_5490 = const()[name = string("op_5490"), val = int32(-1)]; + fp16 const_232_promoted = const()[name = string("const_232_promoted"), val = fp16(-0x1p+0)]; + tensor var_5492 = mul(x = var_5431, y = const_232_promoted)[name = string("op_5492")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_5490, interleave = input_125_interleave_0, values = (var_5431, var_5492))[name = string("input_125")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_5487_to_fp16 = const()[name = string("op_5487_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5487_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor var_5506_to_fp16 = const()[name = string("op_5506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084224)))]; + tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5506_to_fp16)[name = string("q_13_cast_fp16")]; + int32 var_5517 = const()[name = string("op_5517"), val = int32(-1)]; + fp16 const_236_promoted = const()[name = string("const_236_promoted"), val = fp16(-0x1p+0)]; + tensor var_5519 = mul(x = var_5453, y = const_236_promoted)[name = string("op_5519")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_5517, interleave = input_127_interleave_0, values = (var_5453, var_5519))[name = string("input_127")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_5514_to_fp16 = const()[name = string("op_5514_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_5514_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor var_5533_to_fp16 = const()[name = string("op_5533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084800)))]; + tensor k_13_cast_fp16 = mul(x = normed_155, y = var_5533_to_fp16)[name = string("k_13_cast_fp16")]; + tensor var_5535_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5535_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5556_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_242_promoted_to_fp16)[name = string("op_5556_cast_fp16")]; + int32 var_5558 = const()[name = string("op_5558"), val = int32(-1)]; + bool var_5559_interleave_0 = const()[name = string("op_5559_interleave_0"), val = bool(false)]; + tensor var_5559_cast_fp16 = concat(axis = var_5558, interleave = var_5559_interleave_0, values = (var_5556_cast_fp16, x1_25_cast_fp16))[name = string("op_5559_cast_fp16")]; + tensor var_5560_cast_fp16 = mul(x = var_5559_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5560_cast_fp16")]; + tensor query_states_25_cast_fp16 = add(x = var_5535_cast_fp16, y = var_5560_cast_fp16)[name = string("query_states_25_cast_fp16")]; + tensor var_5563_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5563_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5584_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5584_cast_fp16")]; + int32 var_5586 = const()[name = string("op_5586"), val = int32(-1)]; + bool var_5587_interleave_0 = const()[name = string("op_5587_interleave_0"), val = bool(false)]; + tensor var_5587_cast_fp16 = concat(axis = var_5586, interleave = var_5587_interleave_0, values = (var_5584_cast_fp16, x1_27_cast_fp16))[name = string("op_5587_cast_fp16")]; + tensor var_5588_cast_fp16 = mul(x = var_5587_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5588_cast_fp16")]; + tensor key_states_25_cast_fp16 = add(x = var_5563_cast_fp16, y = var_5588_cast_fp16)[name = string("key_states_25_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([5])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([6])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1955, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = key_states_25_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([27])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([28])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1955, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = var_5475, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_13")]; + tensor var_5643_begin_0 = const()[name = string("op_5643_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_5643_end_0 = const()[name = string("op_5643_end_0"), val = tensor([6, 1, 512, 256])]; + tensor var_5643_end_mask_0 = const()[name = string("op_5643_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5643_cast_fp16 = slice_by_index(begin = var_5643_begin_0, end = var_5643_end_0, end_mask = var_5643_end_mask_0, x = coreml_update_state_65)[name = string("op_5643_cast_fp16")]; + tensor var_5650_begin_0 = const()[name = string("op_5650_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_5650_end_0 = const()[name = string("op_5650_end_0"), val = tensor([28, 1, 512, 256])]; + tensor var_5650_end_mask_0 = const()[name = string("op_5650_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5650_cast_fp16 = slice_by_index(begin = var_5650_begin_0, end = var_5650_end_0, end_mask = var_5650_end_mask_0, x = coreml_update_state_65)[name = string("op_5650_cast_fp16")]; + tensor var_5687 = const()[name = string("op_5687"), val = tensor([1, 4, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_5687, x = var_5643_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_5707 = const()[name = string("op_5707"), val = tensor([1, 4, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_5707, x = var_5650_cast_fp16)[name = string("x_107_cast_fp16")]; + bool var_5734_transpose_x_1 = const()[name = string("op_5734_transpose_x_1"), val = bool(false)]; + bool var_5734_transpose_y_1 = const()[name = string("op_5734_transpose_y_1"), val = bool(true)]; + tensor var_5734 = matmul(transpose_x = var_5734_transpose_x_1, transpose_y = var_5734_transpose_y_1, x = query_states_25_cast_fp16, y = x_101_cast_fp16)[name = string("op_5734")]; + fp16 var_5735_to_fp16 = const()[name = string("op_5735_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_5734, y = var_5735_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = var_2129)[name = string("attn_weights_39_cast_fp16")]; + int32 var_5770 = const()[name = string("op_5770"), val = int32(-1)]; + tensor attn_weights_41_cast_fp16 = softmax(axis = var_5770, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = x_107_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_5781_perm_0 = const()[name = string("op_5781_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5785 = const()[name = string("op_5785"), val = tensor([1, 1, 1024])]; + tensor var_5781_cast_fp16 = transpose(perm = var_5781_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_135")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_5785, x = var_5781_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_5790 = const()[name = string("op_5790"), val = tensor([0, 2, 1])]; + string var_5806_pad_type_0 = const()[name = string("op_5806_pad_type_0"), val = string("valid")]; + int32 var_5806_groups_0 = const()[name = string("op_5806_groups_0"), val = int32(1)]; + tensor var_5806_strides_0 = const()[name = string("op_5806_strides_0"), val = tensor([1])]; + tensor var_5806_pad_0 = const()[name = string("op_5806_pad_0"), val = tensor([0, 0])]; + tensor var_5806_dilations_0 = const()[name = string("op_5806_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504085376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504970176))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5791_cast_fp16 = transpose(perm = var_5790, x = attn_output_65_cast_fp16)[name = string("transpose_134")]; + tensor var_5806_cast_fp16 = conv(dilations = var_5806_dilations_0, groups = var_5806_groups_0, pad = var_5806_pad_0, pad_type = var_5806_pad_type_0, strides = var_5806_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5791_cast_fp16)[name = string("op_5806_cast_fp16")]; + tensor var_5810 = const()[name = string("op_5810"), val = tensor([0, 2, 1])]; + int32 var_5821 = const()[name = string("op_5821"), val = int32(-1)]; + fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_109_cast_fp16 = transpose(perm = var_5810, x = var_5806_cast_fp16)[name = string("transpose_133")]; + tensor var_5823_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5823_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_5821, interleave = input_131_interleave_0, values = (hidden_states_109_cast_fp16, var_5823_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_5818_to_fp16 = const()[name = string("op_5818_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_5818_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor var_5837_to_fp16 = const()[name = string("op_5837_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505007104)))]; + tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_5837_to_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + int32 var_5850 = const()[name = string("op_5850"), val = int32(-1)]; + fp16 const_258_promoted_to_fp16 = const()[name = string("const_258_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5852_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_258_promoted_to_fp16)[name = string("op_5852_cast_fp16")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133_cast_fp16 = concat(axis = var_5850, interleave = input_133_interleave_0, values = (hidden_states_111_cast_fp16, var_5852_cast_fp16))[name = string("input_133_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_5847_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor var_5866_to_fp16 = const()[name = string("op_5866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505009472)))]; + tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_5866_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_5878 = const()[name = string("op_5878"), val = tensor([0, 2, 1])]; + tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; + tensor var_5879_cast_fp16 = transpose(perm = var_5878, x = x_109_cast_fp16)[name = string("transpose_132")]; + tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_5879_cast_fp16)[name = string("input_135_cast_fp16")]; + string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; + tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; + tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; + int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505011840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510983872))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511205120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517177152))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; + string var_5904_mode_0 = const()[name = string("op_5904_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5904_cast_fp16 = gelu(mode = var_5904_mode_0, x = x_111_cast_fp16)[name = string("op_5904_cast_fp16")]; + tensor input_137_cast_fp16 = mul(x = var_5904_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523370432))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; + tensor var_5912_axes_0 = const()[name = string("op_5912_axes_0"), val = tensor([2])]; + tensor var_5912_cast_fp16 = squeeze(axes = var_5912_axes_0, x = e_13_cast_fp16)[name = string("op_5912_cast_fp16")]; + tensor var_5913 = const()[name = string("op_5913"), val = tensor([0, 2, 1])]; + int32 var_5924 = const()[name = string("op_5924"), val = int32(-1)]; + fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_113_cast_fp16 = transpose(perm = var_5913, x = var_5912_cast_fp16)[name = string("transpose_131")]; + tensor var_5926_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_5926_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_5924, interleave = input_139_interleave_0, values = (hidden_states_113_cast_fp16, var_5926_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_5921_to_fp16 = const()[name = string("op_5921_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_5921_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; + tensor var_5940_to_fp16 = const()[name = string("op_5940_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523407360)))]; + tensor hidden_states_115_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_5940_to_fp16)[name = string("hidden_states_115_cast_fp16")]; + tensor hidden_states_117_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = hidden_states_115_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; + int32 var_5991 = const()[name = string("op_5991"), val = int32(-1)]; + fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5993_cast_fp16 = mul(x = hidden_states_117_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_5993_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_5991, interleave = input_141_interleave_0, values = (hidden_states_117_cast_fp16, var_5993_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_5988_to_fp16 = const()[name = string("op_5988_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_5988_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; + tensor var_6007_to_fp16 = const()[name = string("op_6007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523409728)))]; + tensor hidden_states_119_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6007_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor var_6012 = const()[name = string("op_6012"), val = tensor([0, 2, 1])]; + tensor var_6015_axes_0 = const()[name = string("op_6015_axes_0"), val = tensor([2])]; + tensor var_6013_cast_fp16 = transpose(perm = var_6012, x = hidden_states_119_cast_fp16)[name = string("transpose_130")]; + tensor var_6015_cast_fp16 = expand_dims(axes = var_6015_axes_0, x = var_6013_cast_fp16)[name = string("op_6015_cast_fp16")]; + string var_6031_pad_type_0 = const()[name = string("op_6031_pad_type_0"), val = string("valid")]; + tensor var_6031_strides_0 = const()[name = string("op_6031_strides_0"), val = tensor([1, 1])]; + tensor var_6031_pad_0 = const()[name = string("op_6031_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6031_dilations_0 = const()[name = string("op_6031_dilations_0"), val = tensor([1, 1])]; + int32 var_6031_groups_0 = const()[name = string("op_6031_groups_0"), val = int32(1)]; + tensor var_6031 = conv(dilations = var_6031_dilations_0, groups = var_6031_groups_0, pad = var_6031_pad_0, pad_type = var_6031_pad_type_0, strides = var_6031_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6015_cast_fp16)[name = string("op_6031")]; + tensor var_6036 = const()[name = string("op_6036"), val = tensor([1, 4, 1, 256])]; + tensor var_6037 = reshape(shape = var_6036, x = var_6031)[name = string("op_6037")]; + string var_6053_pad_type_0 = const()[name = string("op_6053_pad_type_0"), val = string("valid")]; + tensor var_6053_strides_0 = const()[name = string("op_6053_strides_0"), val = tensor([1, 1])]; + tensor var_6053_pad_0 = const()[name = string("op_6053_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6053_dilations_0 = const()[name = string("op_6053_dilations_0"), val = tensor([1, 1])]; + int32 var_6053_groups_0 = const()[name = string("op_6053_groups_0"), val = int32(1)]; + tensor var_6053 = conv(dilations = var_6053_dilations_0, groups = var_6053_groups_0, pad = var_6053_pad_0, pad_type = var_6053_pad_type_0, strides = var_6053_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6015_cast_fp16)[name = string("op_6053")]; + tensor var_6058 = const()[name = string("op_6058"), val = tensor([1, 1, 1, 256])]; + tensor var_6059 = reshape(shape = var_6058, x = var_6053)[name = string("op_6059")]; + string var_6075_pad_type_0 = const()[name = string("op_6075_pad_type_0"), val = string("valid")]; + tensor var_6075_strides_0 = const()[name = string("op_6075_strides_0"), val = tensor([1, 1])]; + tensor var_6075_pad_0 = const()[name = string("op_6075_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6075_dilations_0 = const()[name = string("op_6075_dilations_0"), val = tensor([1, 1])]; + int32 var_6075_groups_0 = const()[name = string("op_6075_groups_0"), val = int32(1)]; + tensor var_6075 = conv(dilations = var_6075_dilations_0, groups = var_6075_groups_0, pad = var_6075_pad_0, pad_type = var_6075_pad_type_0, strides = var_6075_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6015_cast_fp16)[name = string("op_6075")]; + tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 1, 1, 256])]; + tensor var_6081 = reshape(shape = var_6080, x = var_6075)[name = string("op_6081")]; + int32 var_6096 = const()[name = string("op_6096"), val = int32(-1)]; + fp16 const_270_promoted = const()[name = string("const_270_promoted"), val = fp16(-0x1p+0)]; + tensor var_6098 = mul(x = var_6037, y = const_270_promoted)[name = string("op_6098")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145 = concat(axis = var_6096, interleave = input_145_interleave_0, values = (var_6037, var_6098))[name = string("input_145")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_6093_to_fp16 = const()[name = string("op_6093_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6093_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; + tensor var_6112_to_fp16 = const()[name = string("op_6112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412096)))]; + tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6112_to_fp16)[name = string("q_15_cast_fp16")]; + int32 var_6123 = const()[name = string("op_6123"), val = int32(-1)]; + fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; + tensor var_6125 = mul(x = var_6059, y = const_274_promoted)[name = string("op_6125")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_6123, interleave = input_147_interleave_0, values = (var_6059, var_6125))[name = string("input_147")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_6120_to_fp16 = const()[name = string("op_6120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6120_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; + tensor var_6139_to_fp16 = const()[name = string("op_6139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412672)))]; + tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6139_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_6141_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6141_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6162_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_6162_cast_fp16")]; + int32 var_6164 = const()[name = string("op_6164"), val = int32(-1)]; + bool var_6165_interleave_0 = const()[name = string("op_6165_interleave_0"), val = bool(false)]; + tensor var_6165_cast_fp16 = concat(axis = var_6164, interleave = var_6165_interleave_0, values = (var_6162_cast_fp16, x1_29_cast_fp16))[name = string("op_6165_cast_fp16")]; + tensor var_6166_cast_fp16 = mul(x = var_6165_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6166_cast_fp16")]; + tensor query_states_29_cast_fp16 = add(x = var_6141_cast_fp16, y = var_6166_cast_fp16)[name = string("query_states_29_cast_fp16")]; + tensor var_6169_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6169_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6190_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6190_cast_fp16")]; + int32 var_6192 = const()[name = string("op_6192"), val = int32(-1)]; + bool var_6193_interleave_0 = const()[name = string("op_6193_interleave_0"), val = bool(false)]; + tensor var_6193_cast_fp16 = concat(axis = var_6192, interleave = var_6193_interleave_0, values = (var_6190_cast_fp16, x1_31_cast_fp16))[name = string("op_6193_cast_fp16")]; + tensor var_6194_cast_fp16 = mul(x = var_6193_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6194_cast_fp16")]; + tensor key_states_29_cast_fp16 = add(x = var_6169_cast_fp16, y = var_6194_cast_fp16)[name = string("key_states_29_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([6])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([7])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1955, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = key_states_29_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([28])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([29])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1955, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = var_6081, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_15")]; + tensor var_6249_begin_0 = const()[name = string("op_6249_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_6249_end_0 = const()[name = string("op_6249_end_0"), val = tensor([7, 1, 512, 256])]; + tensor var_6249_end_mask_0 = const()[name = string("op_6249_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6249_cast_fp16 = slice_by_index(begin = var_6249_begin_0, end = var_6249_end_0, end_mask = var_6249_end_mask_0, x = coreml_update_state_67)[name = string("op_6249_cast_fp16")]; + tensor var_6256_begin_0 = const()[name = string("op_6256_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_6256_end_0 = const()[name = string("op_6256_end_0"), val = tensor([29, 1, 512, 256])]; + tensor var_6256_end_mask_0 = const()[name = string("op_6256_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6256_cast_fp16 = slice_by_index(begin = var_6256_begin_0, end = var_6256_end_0, end_mask = var_6256_end_mask_0, x = coreml_update_state_67)[name = string("op_6256_cast_fp16")]; + tensor var_6293 = const()[name = string("op_6293"), val = tensor([1, 4, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_6293, x = var_6249_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_6313 = const()[name = string("op_6313"), val = tensor([1, 4, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_6313, x = var_6256_cast_fp16)[name = string("x_123_cast_fp16")]; + bool var_6340_transpose_x_1 = const()[name = string("op_6340_transpose_x_1"), val = bool(false)]; + bool var_6340_transpose_y_1 = const()[name = string("op_6340_transpose_y_1"), val = bool(true)]; + tensor var_6340 = matmul(transpose_x = var_6340_transpose_x_1, transpose_y = var_6340_transpose_y_1, x = query_states_29_cast_fp16, y = x_117_cast_fp16)[name = string("op_6340")]; + fp16 var_6341_to_fp16 = const()[name = string("op_6341_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_43_cast_fp16 = mul(x = var_6340, y = var_6341_to_fp16)[name = string("attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = var_2129)[name = string("attn_weights_45_cast_fp16")]; + int32 var_6376 = const()[name = string("op_6376"), val = int32(-1)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_6376, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; + bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; + tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = x_123_cast_fp16)[name = string("attn_output_71_cast_fp16")]; + tensor var_6387_perm_0 = const()[name = string("op_6387_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6391 = const()[name = string("op_6391"), val = tensor([1, 1, 1024])]; + tensor var_6387_cast_fp16 = transpose(perm = var_6387_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_129")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_6391, x = var_6387_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_6396 = const()[name = string("op_6396"), val = tensor([0, 2, 1])]; + string var_6412_pad_type_0 = const()[name = string("op_6412_pad_type_0"), val = string("valid")]; + int32 var_6412_groups_0 = const()[name = string("op_6412_groups_0"), val = int32(1)]; + tensor var_6412_strides_0 = const()[name = string("op_6412_strides_0"), val = tensor([1])]; + tensor var_6412_pad_0 = const()[name = string("op_6412_pad_0"), val = tensor([0, 0])]; + tensor var_6412_dilations_0 = const()[name = string("op_6412_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523413248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524298048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6397_cast_fp16 = transpose(perm = var_6396, x = attn_output_75_cast_fp16)[name = string("transpose_128")]; + tensor var_6412_cast_fp16 = conv(dilations = var_6412_dilations_0, groups = var_6412_groups_0, pad = var_6412_pad_0, pad_type = var_6412_pad_type_0, strides = var_6412_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_6397_cast_fp16)[name = string("op_6412_cast_fp16")]; + tensor var_6416 = const()[name = string("op_6416"), val = tensor([0, 2, 1])]; + int32 var_6427 = const()[name = string("op_6427"), val = int32(-1)]; + fp16 const_292_promoted_to_fp16 = const()[name = string("const_292_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_125_cast_fp16 = transpose(perm = var_6416, x = var_6412_cast_fp16)[name = string("transpose_127")]; + tensor var_6429_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_292_promoted_to_fp16)[name = string("op_6429_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_6427, interleave = input_151_interleave_0, values = (hidden_states_125_cast_fp16, var_6429_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_6424_to_fp16 = const()[name = string("op_6424_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_6424_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; + tensor var_6443_to_fp16 = const()[name = string("op_6443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524334976)))]; + tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_6443_to_fp16)[name = string("attn_output_79_cast_fp16")]; + tensor hidden_states_127_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + int32 var_6456 = const()[name = string("op_6456"), val = int32(-1)]; + fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6458_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6458_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_6456, interleave = input_153_interleave_0, values = (hidden_states_127_cast_fp16, var_6458_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_6453_to_fp16 = const()[name = string("op_6453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_6453_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; + tensor var_6472_to_fp16 = const()[name = string("op_6472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524337344)))]; + tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_6472_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_6484 = const()[name = string("op_6484"), val = tensor([0, 2, 1])]; + tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; + tensor var_6485_cast_fp16 = transpose(perm = var_6484, x = x_125_cast_fp16)[name = string("transpose_126")]; + tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_6485_cast_fp16)[name = string("input_155_cast_fp16")]; + string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; + tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; + tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; + int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524339712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530311744))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530532992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536505024))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; + string var_6510_mode_0 = const()[name = string("op_6510_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_6510_cast_fp16 = gelu(mode = var_6510_mode_0, x = x_127_cast_fp16)[name = string("op_6510_cast_fp16")]; + tensor input_157_cast_fp16 = mul(x = var_6510_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536726272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542698304))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; + tensor var_6518_axes_0 = const()[name = string("op_6518_axes_0"), val = tensor([2])]; + tensor var_6518_cast_fp16 = squeeze(axes = var_6518_axes_0, x = e_15_cast_fp16)[name = string("op_6518_cast_fp16")]; + tensor var_6519 = const()[name = string("op_6519"), val = tensor([0, 2, 1])]; + int32 var_6530 = const()[name = string("op_6530"), val = int32(-1)]; + fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_129_cast_fp16 = transpose(perm = var_6519, x = var_6518_cast_fp16)[name = string("transpose_125")]; + tensor var_6532_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_300_promoted_to_fp16)[name = string("op_6532_cast_fp16")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159_cast_fp16 = concat(axis = var_6530, interleave = input_159_interleave_0, values = (hidden_states_129_cast_fp16, var_6532_cast_fp16))[name = string("input_159_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_6527_to_fp16 = const()[name = string("op_6527_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_6527_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor var_6546_to_fp16 = const()[name = string("op_6546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542735232)))]; + tensor hidden_states_131_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_6546_to_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + int32 var_6597 = const()[name = string("op_6597"), val = int32(-1)]; + fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6599_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_6599_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_6597, interleave = input_161_interleave_0, values = (hidden_states_133_cast_fp16, var_6599_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_6594_to_fp16 = const()[name = string("op_6594_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_6594_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor var_6613_to_fp16 = const()[name = string("op_6613_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542737600)))]; + tensor hidden_states_135_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_6613_to_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor var_6618 = const()[name = string("op_6618"), val = tensor([0, 2, 1])]; + tensor var_6621_axes_0 = const()[name = string("op_6621_axes_0"), val = tensor([2])]; + tensor var_6619_cast_fp16 = transpose(perm = var_6618, x = hidden_states_135_cast_fp16)[name = string("transpose_124")]; + tensor var_6621_cast_fp16 = expand_dims(axes = var_6621_axes_0, x = var_6619_cast_fp16)[name = string("op_6621_cast_fp16")]; + string var_6637_pad_type_0 = const()[name = string("op_6637_pad_type_0"), val = string("valid")]; + tensor var_6637_strides_0 = const()[name = string("op_6637_strides_0"), val = tensor([1, 1])]; + tensor var_6637_pad_0 = const()[name = string("op_6637_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6637_dilations_0 = const()[name = string("op_6637_dilations_0"), val = tensor([1, 1])]; + int32 var_6637_groups_0 = const()[name = string("op_6637_groups_0"), val = int32(1)]; + tensor var_6637 = conv(dilations = var_6637_dilations_0, groups = var_6637_groups_0, pad = var_6637_pad_0, pad_type = var_6637_pad_type_0, strides = var_6637_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_6621_cast_fp16)[name = string("op_6637")]; + tensor var_6642 = const()[name = string("op_6642"), val = tensor([1, 4, 1, 256])]; + tensor var_6643 = reshape(shape = var_6642, x = var_6637)[name = string("op_6643")]; + string var_6659_pad_type_0 = const()[name = string("op_6659_pad_type_0"), val = string("valid")]; + tensor var_6659_strides_0 = const()[name = string("op_6659_strides_0"), val = tensor([1, 1])]; + tensor var_6659_pad_0 = const()[name = string("op_6659_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6659_dilations_0 = const()[name = string("op_6659_dilations_0"), val = tensor([1, 1])]; + int32 var_6659_groups_0 = const()[name = string("op_6659_groups_0"), val = int32(1)]; + tensor var_6659 = conv(dilations = var_6659_dilations_0, groups = var_6659_groups_0, pad = var_6659_pad_0, pad_type = var_6659_pad_type_0, strides = var_6659_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_6621_cast_fp16)[name = string("op_6659")]; + tensor var_6664 = const()[name = string("op_6664"), val = tensor([1, 1, 1, 256])]; + tensor var_6665 = reshape(shape = var_6664, x = var_6659)[name = string("op_6665")]; + string var_6681_pad_type_0 = const()[name = string("op_6681_pad_type_0"), val = string("valid")]; + tensor var_6681_strides_0 = const()[name = string("op_6681_strides_0"), val = tensor([1, 1])]; + tensor var_6681_pad_0 = const()[name = string("op_6681_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6681_dilations_0 = const()[name = string("op_6681_dilations_0"), val = tensor([1, 1])]; + int32 var_6681_groups_0 = const()[name = string("op_6681_groups_0"), val = int32(1)]; + tensor var_6681 = conv(dilations = var_6681_dilations_0, groups = var_6681_groups_0, pad = var_6681_pad_0, pad_type = var_6681_pad_type_0, strides = var_6681_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_6621_cast_fp16)[name = string("op_6681")]; + tensor var_6686 = const()[name = string("op_6686"), val = tensor([1, 1, 1, 256])]; + tensor var_6687 = reshape(shape = var_6686, x = var_6681)[name = string("op_6687")]; + int32 var_6702 = const()[name = string("op_6702"), val = int32(-1)]; + fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; + tensor var_6704 = mul(x = var_6643, y = const_308_promoted)[name = string("op_6704")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165 = concat(axis = var_6702, interleave = input_165_interleave_0, values = (var_6643, var_6704))[name = string("input_165")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_6699_to_fp16 = const()[name = string("op_6699_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_6699_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor var_6718_to_fp16 = const()[name = string("op_6718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542739968)))]; + tensor q_17_cast_fp16 = mul(x = normed_199, y = var_6718_to_fp16)[name = string("q_17_cast_fp16")]; + int32 var_6729 = const()[name = string("op_6729"), val = int32(-1)]; + fp16 const_312_promoted = const()[name = string("const_312_promoted"), val = fp16(-0x1p+0)]; + tensor var_6731 = mul(x = var_6665, y = const_312_promoted)[name = string("op_6731")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_6729, interleave = input_167_interleave_0, values = (var_6665, var_6731))[name = string("input_167")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_6726_to_fp16 = const()[name = string("op_6726_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_6726_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor var_6745_to_fp16 = const()[name = string("op_6745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542740544)))]; + tensor k_17_cast_fp16 = mul(x = normed_203, y = var_6745_to_fp16)[name = string("k_17_cast_fp16")]; + tensor var_6747_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6747_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6768_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_318_promoted_to_fp16)[name = string("op_6768_cast_fp16")]; + int32 var_6770 = const()[name = string("op_6770"), val = int32(-1)]; + bool var_6771_interleave_0 = const()[name = string("op_6771_interleave_0"), val = bool(false)]; + tensor var_6771_cast_fp16 = concat(axis = var_6770, interleave = var_6771_interleave_0, values = (var_6768_cast_fp16, x1_33_cast_fp16))[name = string("op_6771_cast_fp16")]; + tensor var_6772_cast_fp16 = mul(x = var_6771_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6772_cast_fp16")]; + tensor query_states_33_cast_fp16 = add(x = var_6747_cast_fp16, y = var_6772_cast_fp16)[name = string("query_states_33_cast_fp16")]; + tensor var_6775_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6775_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_321_promoted_to_fp16 = const()[name = string("const_321_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6796_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_321_promoted_to_fp16)[name = string("op_6796_cast_fp16")]; + int32 var_6798 = const()[name = string("op_6798"), val = int32(-1)]; + bool var_6799_interleave_0 = const()[name = string("op_6799_interleave_0"), val = bool(false)]; + tensor var_6799_cast_fp16 = concat(axis = var_6798, interleave = var_6799_interleave_0, values = (var_6796_cast_fp16, x1_35_cast_fp16))[name = string("op_6799_cast_fp16")]; + tensor var_6800_cast_fp16 = mul(x = var_6799_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6800_cast_fp16")]; + tensor key_states_33_cast_fp16 = add(x = var_6775_cast_fp16, y = var_6800_cast_fp16)[name = string("key_states_33_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([7])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([8])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; + tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; + tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; + int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; + bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; + tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1955, concat_67_values3_0))[name = string("concat_67")]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = key_states_33_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_16")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([29])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([30])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1955, concat_71_values3_0))[name = string("concat_71")]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = var_6687, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_17")]; + tensor var_6855_begin_0 = const()[name = string("op_6855_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_6855_end_0 = const()[name = string("op_6855_end_0"), val = tensor([8, 1, 512, 256])]; + tensor var_6855_end_mask_0 = const()[name = string("op_6855_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6855_cast_fp16 = slice_by_index(begin = var_6855_begin_0, end = var_6855_end_0, end_mask = var_6855_end_mask_0, x = coreml_update_state_69)[name = string("op_6855_cast_fp16")]; + tensor var_6862_begin_0 = const()[name = string("op_6862_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_6862_end_0 = const()[name = string("op_6862_end_0"), val = tensor([30, 1, 512, 256])]; + tensor var_6862_end_mask_0 = const()[name = string("op_6862_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6862_cast_fp16 = slice_by_index(begin = var_6862_begin_0, end = var_6862_end_0, end_mask = var_6862_end_mask_0, x = coreml_update_state_69)[name = string("op_6862_cast_fp16")]; + tensor var_6899 = const()[name = string("op_6899"), val = tensor([1, 4, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_6899, x = var_6855_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_6919 = const()[name = string("op_6919"), val = tensor([1, 4, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_6919, x = var_6862_cast_fp16)[name = string("x_139_cast_fp16")]; + bool var_6946_transpose_x_1 = const()[name = string("op_6946_transpose_x_1"), val = bool(false)]; + bool var_6946_transpose_y_1 = const()[name = string("op_6946_transpose_y_1"), val = bool(true)]; + tensor var_6946 = matmul(transpose_x = var_6946_transpose_x_1, transpose_y = var_6946_transpose_y_1, x = query_states_33_cast_fp16, y = x_133_cast_fp16)[name = string("op_6946")]; + fp16 var_6947_to_fp16 = const()[name = string("op_6947_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_6946, y = var_6947_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = var_2129)[name = string("attn_weights_51_cast_fp16")]; + int32 var_6982 = const()[name = string("op_6982"), val = int32(-1)]; + tensor attn_weights_53_cast_fp16 = softmax(axis = var_6982, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; + bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; + tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = x_139_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_6993_perm_0 = const()[name = string("op_6993_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6997 = const()[name = string("op_6997"), val = tensor([1, 1, 1024])]; + tensor var_6993_cast_fp16 = transpose(perm = var_6993_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_123")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_6997, x = var_6993_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_7002 = const()[name = string("op_7002"), val = tensor([0, 2, 1])]; + string var_7018_pad_type_0 = const()[name = string("op_7018_pad_type_0"), val = string("valid")]; + int32 var_7018_groups_0 = const()[name = string("op_7018_groups_0"), val = int32(1)]; + tensor var_7018_strides_0 = const()[name = string("op_7018_strides_0"), val = tensor([1])]; + tensor var_7018_pad_0 = const()[name = string("op_7018_pad_0"), val = tensor([0, 0])]; + tensor var_7018_dilations_0 = const()[name = string("op_7018_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542741120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543625920))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7003_cast_fp16 = transpose(perm = var_7002, x = attn_output_85_cast_fp16)[name = string("transpose_122")]; + tensor var_7018_cast_fp16 = conv(dilations = var_7018_dilations_0, groups = var_7018_groups_0, pad = var_7018_pad_0, pad_type = var_7018_pad_type_0, strides = var_7018_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_7003_cast_fp16)[name = string("op_7018_cast_fp16")]; + tensor var_7022 = const()[name = string("op_7022"), val = tensor([0, 2, 1])]; + int32 var_7033 = const()[name = string("op_7033"), val = int32(-1)]; + fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_141_cast_fp16 = transpose(perm = var_7022, x = var_7018_cast_fp16)[name = string("transpose_121")]; + tensor var_7035_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_330_promoted_to_fp16)[name = string("op_7035_cast_fp16")]; + bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; + tensor input_171_cast_fp16 = concat(axis = var_7033, interleave = input_171_interleave_0, values = (hidden_states_141_cast_fp16, var_7035_cast_fp16))[name = string("input_171_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_7030_to_fp16 = const()[name = string("op_7030_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7030_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor var_7049_to_fp16 = const()[name = string("op_7049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543662848)))]; + tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7049_to_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + int32 var_7062 = const()[name = string("op_7062"), val = int32(-1)]; + fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7064_cast_fp16 = mul(x = hidden_states_143_cast_fp16, y = const_334_promoted_to_fp16)[name = string("op_7064_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_7062, interleave = input_173_interleave_0, values = (hidden_states_143_cast_fp16, var_7064_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_7059_to_fp16 = const()[name = string("op_7059_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7059_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor var_7078_to_fp16 = const()[name = string("op_7078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543665216)))]; + tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7078_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_7090 = const()[name = string("op_7090"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_7091_cast_fp16 = transpose(perm = var_7090, x = x_141_cast_fp16)[name = string("transpose_120")]; + tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7091_cast_fp16)[name = string("input_175_cast_fp16")]; + string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; + tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; + tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; + int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543667584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549639616))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549860864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555832896))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; + string var_7116_mode_0 = const()[name = string("op_7116_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7116_cast_fp16 = gelu(mode = var_7116_mode_0, x = x_143_cast_fp16)[name = string("op_7116_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = var_7116_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556054144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562026176))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; + tensor var_7124_axes_0 = const()[name = string("op_7124_axes_0"), val = tensor([2])]; + tensor var_7124_cast_fp16 = squeeze(axes = var_7124_axes_0, x = e_17_cast_fp16)[name = string("op_7124_cast_fp16")]; + tensor var_7125 = const()[name = string("op_7125"), val = tensor([0, 2, 1])]; + int32 var_7136 = const()[name = string("op_7136"), val = int32(-1)]; + fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_145_cast_fp16 = transpose(perm = var_7125, x = var_7124_cast_fp16)[name = string("transpose_119")]; + tensor var_7138_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7138_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_7136, interleave = input_179_interleave_0, values = (hidden_states_145_cast_fp16, var_7138_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_7133_to_fp16 = const()[name = string("op_7133_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7133_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; + tensor var_7152_to_fp16 = const()[name = string("op_7152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063104)))]; + tensor hidden_states_147_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7152_to_fp16)[name = string("hidden_states_147_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = hidden_states_147_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_7203 = const()[name = string("op_7203"), val = int32(-1)]; + fp16 const_342_promoted_to_fp16 = const()[name = string("const_342_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7205_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_342_promoted_to_fp16)[name = string("op_7205_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_7203, interleave = input_181_interleave_0, values = (hidden_states_149_cast_fp16, var_7205_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_7200_to_fp16 = const()[name = string("op_7200_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7200_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; + tensor var_7219_to_fp16 = const()[name = string("op_7219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065472)))]; + tensor hidden_states_151_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7219_to_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor var_7224 = const()[name = string("op_7224"), val = tensor([0, 2, 1])]; + tensor var_7227_axes_0 = const()[name = string("op_7227_axes_0"), val = tensor([2])]; + tensor var_7225_cast_fp16 = transpose(perm = var_7224, x = hidden_states_151_cast_fp16)[name = string("transpose_118")]; + tensor var_7227_cast_fp16 = expand_dims(axes = var_7227_axes_0, x = var_7225_cast_fp16)[name = string("op_7227_cast_fp16")]; + string var_7243_pad_type_0 = const()[name = string("op_7243_pad_type_0"), val = string("valid")]; + tensor var_7243_strides_0 = const()[name = string("op_7243_strides_0"), val = tensor([1, 1])]; + tensor var_7243_pad_0 = const()[name = string("op_7243_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7243_dilations_0 = const()[name = string("op_7243_dilations_0"), val = tensor([1, 1])]; + int32 var_7243_groups_0 = const()[name = string("op_7243_groups_0"), val = int32(1)]; + tensor var_7243 = conv(dilations = var_7243_dilations_0, groups = var_7243_groups_0, pad = var_7243_pad_0, pad_type = var_7243_pad_type_0, strides = var_7243_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7227_cast_fp16)[name = string("op_7243")]; + tensor var_7248 = const()[name = string("op_7248"), val = tensor([1, 4, 1, 256])]; + tensor var_7249 = reshape(shape = var_7248, x = var_7243)[name = string("op_7249")]; + string var_7265_pad_type_0 = const()[name = string("op_7265_pad_type_0"), val = string("valid")]; + tensor var_7265_strides_0 = const()[name = string("op_7265_strides_0"), val = tensor([1, 1])]; + tensor var_7265_pad_0 = const()[name = string("op_7265_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7265_dilations_0 = const()[name = string("op_7265_dilations_0"), val = tensor([1, 1])]; + int32 var_7265_groups_0 = const()[name = string("op_7265_groups_0"), val = int32(1)]; + tensor var_7265 = conv(dilations = var_7265_dilations_0, groups = var_7265_groups_0, pad = var_7265_pad_0, pad_type = var_7265_pad_type_0, strides = var_7265_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7227_cast_fp16)[name = string("op_7265")]; + tensor var_7270 = const()[name = string("op_7270"), val = tensor([1, 1, 1, 256])]; + tensor var_7271 = reshape(shape = var_7270, x = var_7265)[name = string("op_7271")]; + string var_7287_pad_type_0 = const()[name = string("op_7287_pad_type_0"), val = string("valid")]; + tensor var_7287_strides_0 = const()[name = string("op_7287_strides_0"), val = tensor([1, 1])]; + tensor var_7287_pad_0 = const()[name = string("op_7287_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7287_dilations_0 = const()[name = string("op_7287_dilations_0"), val = tensor([1, 1])]; + int32 var_7287_groups_0 = const()[name = string("op_7287_groups_0"), val = int32(1)]; + tensor var_7287 = conv(dilations = var_7287_dilations_0, groups = var_7287_groups_0, pad = var_7287_pad_0, pad_type = var_7287_pad_type_0, strides = var_7287_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7227_cast_fp16)[name = string("op_7287")]; + tensor var_7292 = const()[name = string("op_7292"), val = tensor([1, 1, 1, 256])]; + tensor var_7293 = reshape(shape = var_7292, x = var_7287)[name = string("op_7293")]; + int32 var_7308 = const()[name = string("op_7308"), val = int32(-1)]; + fp16 const_346_promoted = const()[name = string("const_346_promoted"), val = fp16(-0x1p+0)]; + tensor var_7310 = mul(x = var_7249, y = const_346_promoted)[name = string("op_7310")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_7308, interleave = input_185_interleave_0, values = (var_7249, var_7310))[name = string("input_185")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_7305_to_fp16 = const()[name = string("op_7305_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_7305_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; + tensor var_7324_to_fp16 = const()[name = string("op_7324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562067840)))]; + tensor q_19_cast_fp16 = mul(x = normed_223, y = var_7324_to_fp16)[name = string("q_19_cast_fp16")]; + int32 var_7335 = const()[name = string("op_7335"), val = int32(-1)]; + fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; + tensor var_7337 = mul(x = var_7271, y = const_350_promoted)[name = string("op_7337")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_7335, interleave = input_187_interleave_0, values = (var_7271, var_7337))[name = string("input_187")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_7332_to_fp16 = const()[name = string("op_7332_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_7332_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; + tensor var_7351_to_fp16 = const()[name = string("op_7351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068416)))]; + tensor k_19_cast_fp16 = mul(x = normed_227, y = var_7351_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_7353_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7353_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7374_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7374_cast_fp16")]; + int32 var_7376 = const()[name = string("op_7376"), val = int32(-1)]; + bool var_7377_interleave_0 = const()[name = string("op_7377_interleave_0"), val = bool(false)]; + tensor var_7377_cast_fp16 = concat(axis = var_7376, interleave = var_7377_interleave_0, values = (var_7374_cast_fp16, x1_37_cast_fp16))[name = string("op_7377_cast_fp16")]; + tensor var_7378_cast_fp16 = mul(x = var_7377_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7378_cast_fp16")]; + tensor query_states_37_cast_fp16 = add(x = var_7353_cast_fp16, y = var_7378_cast_fp16)[name = string("query_states_37_cast_fp16")]; + tensor var_7381_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7381_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7402_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_359_promoted_to_fp16)[name = string("op_7402_cast_fp16")]; + int32 var_7404 = const()[name = string("op_7404"), val = int32(-1)]; + bool var_7405_interleave_0 = const()[name = string("op_7405_interleave_0"), val = bool(false)]; + tensor var_7405_cast_fp16 = concat(axis = var_7404, interleave = var_7405_interleave_0, values = (var_7402_cast_fp16, x1_39_cast_fp16))[name = string("op_7405_cast_fp16")]; + tensor var_7406_cast_fp16 = mul(x = var_7405_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7406_cast_fp16")]; + tensor key_states_37_cast_fp16 = add(x = var_7381_cast_fp16, y = var_7406_cast_fp16)[name = string("key_states_37_cast_fp16")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([8])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([9])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1955, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = key_states_37_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_18")]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([30])]; + tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; + tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; + tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([31])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1955, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = var_7293, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_19")]; + tensor var_7461_begin_0 = const()[name = string("op_7461_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_7461_end_0 = const()[name = string("op_7461_end_0"), val = tensor([9, 1, 512, 256])]; + tensor var_7461_end_mask_0 = const()[name = string("op_7461_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7461_cast_fp16 = slice_by_index(begin = var_7461_begin_0, end = var_7461_end_0, end_mask = var_7461_end_mask_0, x = coreml_update_state_71)[name = string("op_7461_cast_fp16")]; + tensor var_7468_begin_0 = const()[name = string("op_7468_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_7468_end_0 = const()[name = string("op_7468_end_0"), val = tensor([31, 1, 512, 256])]; + tensor var_7468_end_mask_0 = const()[name = string("op_7468_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7468_cast_fp16 = slice_by_index(begin = var_7468_begin_0, end = var_7468_end_0, end_mask = var_7468_end_mask_0, x = coreml_update_state_71)[name = string("op_7468_cast_fp16")]; + tensor var_7505 = const()[name = string("op_7505"), val = tensor([1, 4, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_7505, x = var_7461_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_7525 = const()[name = string("op_7525"), val = tensor([1, 4, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_7525, x = var_7468_cast_fp16)[name = string("x_155_cast_fp16")]; + bool var_7552_transpose_x_1 = const()[name = string("op_7552_transpose_x_1"), val = bool(false)]; + bool var_7552_transpose_y_1 = const()[name = string("op_7552_transpose_y_1"), val = bool(true)]; + tensor var_7552 = matmul(transpose_x = var_7552_transpose_x_1, transpose_y = var_7552_transpose_y_1, x = query_states_37_cast_fp16, y = x_149_cast_fp16)[name = string("op_7552")]; + fp16 var_7553_to_fp16 = const()[name = string("op_7553_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_55_cast_fp16 = mul(x = var_7552, y = var_7553_to_fp16)[name = string("attn_weights_55_cast_fp16")]; + tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = var_2129)[name = string("attn_weights_57_cast_fp16")]; + int32 var_7588 = const()[name = string("op_7588"), val = int32(-1)]; + tensor attn_weights_59_cast_fp16 = softmax(axis = var_7588, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; + bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; + bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; + tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = x_155_cast_fp16)[name = string("attn_output_91_cast_fp16")]; + tensor var_7599_perm_0 = const()[name = string("op_7599_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7603 = const()[name = string("op_7603"), val = tensor([1, 1, 1024])]; + tensor var_7599_cast_fp16 = transpose(perm = var_7599_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_117")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_7603, x = var_7599_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_7608 = const()[name = string("op_7608"), val = tensor([0, 2, 1])]; + string var_7624_pad_type_0 = const()[name = string("op_7624_pad_type_0"), val = string("valid")]; + int32 var_7624_groups_0 = const()[name = string("op_7624_groups_0"), val = int32(1)]; + tensor var_7624_strides_0 = const()[name = string("op_7624_strides_0"), val = tensor([1])]; + tensor var_7624_pad_0 = const()[name = string("op_7624_pad_0"), val = tensor([0, 0])]; + tensor var_7624_dilations_0 = const()[name = string("op_7624_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562953792))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7609_cast_fp16 = transpose(perm = var_7608, x = attn_output_95_cast_fp16)[name = string("transpose_116")]; + tensor var_7624_cast_fp16 = conv(dilations = var_7624_dilations_0, groups = var_7624_groups_0, pad = var_7624_pad_0, pad_type = var_7624_pad_type_0, strides = var_7624_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_7609_cast_fp16)[name = string("op_7624_cast_fp16")]; + tensor var_7628 = const()[name = string("op_7628"), val = tensor([0, 2, 1])]; + int32 var_7639 = const()[name = string("op_7639"), val = int32(-1)]; + fp16 const_368_promoted_to_fp16 = const()[name = string("const_368_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_157_cast_fp16 = transpose(perm = var_7628, x = var_7624_cast_fp16)[name = string("transpose_115")]; + tensor var_7641_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_368_promoted_to_fp16)[name = string("op_7641_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_7639, interleave = input_191_interleave_0, values = (hidden_states_157_cast_fp16, var_7641_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_7636_to_fp16 = const()[name = string("op_7636_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_7636_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; + tensor var_7655_to_fp16 = const()[name = string("op_7655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562990720)))]; + tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_7655_to_fp16)[name = string("attn_output_99_cast_fp16")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_7668 = const()[name = string("op_7668"), val = int32(-1)]; + fp16 const_372_promoted_to_fp16 = const()[name = string("const_372_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7670_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_372_promoted_to_fp16)[name = string("op_7670_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_7668, interleave = input_193_interleave_0, values = (hidden_states_159_cast_fp16, var_7670_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_7665_to_fp16 = const()[name = string("op_7665_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_7665_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; + tensor var_7684_to_fp16 = const()[name = string("op_7684_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562993088)))]; + tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_7684_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_7696 = const()[name = string("op_7696"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_7697_cast_fp16 = transpose(perm = var_7696, x = x_157_cast_fp16)[name = string("transpose_114")]; + tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_7697_cast_fp16)[name = string("input_195_cast_fp16")]; + string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; + tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; + tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; + int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562995456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568967488))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569188736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575160768))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; + string var_7722_mode_0 = const()[name = string("op_7722_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7722_cast_fp16 = gelu(mode = var_7722_mode_0, x = x_159_cast_fp16)[name = string("op_7722_cast_fp16")]; + tensor input_197_cast_fp16 = mul(x = var_7722_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575382016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581354048))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; + tensor var_7730_axes_0 = const()[name = string("op_7730_axes_0"), val = tensor([2])]; + tensor var_7730_cast_fp16 = squeeze(axes = var_7730_axes_0, x = e_19_cast_fp16)[name = string("op_7730_cast_fp16")]; + tensor var_7731 = const()[name = string("op_7731"), val = tensor([0, 2, 1])]; + int32 var_7742 = const()[name = string("op_7742"), val = int32(-1)]; + fp16 const_376_promoted_to_fp16 = const()[name = string("const_376_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_161_cast_fp16 = transpose(perm = var_7731, x = var_7730_cast_fp16)[name = string("transpose_113")]; + tensor var_7744_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_376_promoted_to_fp16)[name = string("op_7744_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_7742, interleave = input_199_interleave_0, values = (hidden_states_161_cast_fp16, var_7744_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_7739_to_fp16 = const()[name = string("op_7739_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_7739_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor var_7758_to_fp16 = const()[name = string("op_7758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581390976)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_7758_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor hidden_states_165_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = hidden_states_163_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + int32 var_7809 = const()[name = string("op_7809"), val = int32(-1)]; + fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7811_cast_fp16 = mul(x = hidden_states_165_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_7811_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_7809, interleave = input_201_interleave_0, values = (hidden_states_165_cast_fp16, var_7811_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_7806_to_fp16 = const()[name = string("op_7806_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_7806_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor var_7825_to_fp16 = const()[name = string("op_7825_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581393344)))]; + tensor hidden_states_167_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_7825_to_fp16)[name = string("hidden_states_167_cast_fp16")]; + tensor var_7830 = const()[name = string("op_7830"), val = tensor([0, 2, 1])]; + tensor var_7833_axes_0 = const()[name = string("op_7833_axes_0"), val = tensor([2])]; + tensor var_7831_cast_fp16 = transpose(perm = var_7830, x = hidden_states_167_cast_fp16)[name = string("transpose_112")]; + tensor var_7833_cast_fp16 = expand_dims(axes = var_7833_axes_0, x = var_7831_cast_fp16)[name = string("op_7833_cast_fp16")]; + string var_7849_pad_type_0 = const()[name = string("op_7849_pad_type_0"), val = string("valid")]; + tensor var_7849_strides_0 = const()[name = string("op_7849_strides_0"), val = tensor([1, 1])]; + tensor var_7849_pad_0 = const()[name = string("op_7849_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7849_dilations_0 = const()[name = string("op_7849_dilations_0"), val = tensor([1, 1])]; + int32 var_7849_groups_0 = const()[name = string("op_7849_groups_0"), val = int32(1)]; + tensor var_7849 = conv(dilations = var_7849_dilations_0, groups = var_7849_groups_0, pad = var_7849_pad_0, pad_type = var_7849_pad_type_0, strides = var_7849_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_7833_cast_fp16)[name = string("op_7849")]; + tensor var_7854 = const()[name = string("op_7854"), val = tensor([1, 4, 1, 256])]; + tensor var_7855 = reshape(shape = var_7854, x = var_7849)[name = string("op_7855")]; + string var_7871_pad_type_0 = const()[name = string("op_7871_pad_type_0"), val = string("valid")]; + tensor var_7871_strides_0 = const()[name = string("op_7871_strides_0"), val = tensor([1, 1])]; + tensor var_7871_pad_0 = const()[name = string("op_7871_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7871_dilations_0 = const()[name = string("op_7871_dilations_0"), val = tensor([1, 1])]; + int32 var_7871_groups_0 = const()[name = string("op_7871_groups_0"), val = int32(1)]; + tensor var_7871 = conv(dilations = var_7871_dilations_0, groups = var_7871_groups_0, pad = var_7871_pad_0, pad_type = var_7871_pad_type_0, strides = var_7871_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_7833_cast_fp16)[name = string("op_7871")]; + tensor var_7876 = const()[name = string("op_7876"), val = tensor([1, 1, 1, 256])]; + tensor var_7877 = reshape(shape = var_7876, x = var_7871)[name = string("op_7877")]; + string var_7893_pad_type_0 = const()[name = string("op_7893_pad_type_0"), val = string("valid")]; + tensor var_7893_strides_0 = const()[name = string("op_7893_strides_0"), val = tensor([1, 1])]; + tensor var_7893_pad_0 = const()[name = string("op_7893_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7893_dilations_0 = const()[name = string("op_7893_dilations_0"), val = tensor([1, 1])]; + int32 var_7893_groups_0 = const()[name = string("op_7893_groups_0"), val = int32(1)]; + tensor var_7893 = conv(dilations = var_7893_dilations_0, groups = var_7893_groups_0, pad = var_7893_pad_0, pad_type = var_7893_pad_type_0, strides = var_7893_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_7833_cast_fp16)[name = string("op_7893")]; + tensor var_7898 = const()[name = string("op_7898"), val = tensor([1, 1, 1, 256])]; + tensor var_7899 = reshape(shape = var_7898, x = var_7893)[name = string("op_7899")]; + int32 var_7914 = const()[name = string("op_7914"), val = int32(-1)]; + fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; + tensor var_7916 = mul(x = var_7855, y = const_384_promoted)[name = string("op_7916")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_7914, interleave = input_205_interleave_0, values = (var_7855, var_7916))[name = string("input_205")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_7911_to_fp16 = const()[name = string("op_7911_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_7911_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor var_7930_to_fp16 = const()[name = string("op_7930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581395712)))]; + tensor q_21_cast_fp16 = mul(x = normed_247, y = var_7930_to_fp16)[name = string("q_21_cast_fp16")]; + int32 var_7941 = const()[name = string("op_7941"), val = int32(-1)]; + fp16 const_388_promoted = const()[name = string("const_388_promoted"), val = fp16(-0x1p+0)]; + tensor var_7943 = mul(x = var_7877, y = const_388_promoted)[name = string("op_7943")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207 = concat(axis = var_7941, interleave = input_207_interleave_0, values = (var_7877, var_7943))[name = string("input_207")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_7938_to_fp16 = const()[name = string("op_7938_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_7938_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor var_7957_to_fp16 = const()[name = string("op_7957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396288)))]; + tensor k_21_cast_fp16 = mul(x = normed_251, y = var_7957_to_fp16)[name = string("k_21_cast_fp16")]; + tensor var_7959_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7959_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_394_promoted_to_fp16 = const()[name = string("const_394_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7980_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_394_promoted_to_fp16)[name = string("op_7980_cast_fp16")]; + int32 var_7982 = const()[name = string("op_7982"), val = int32(-1)]; + bool var_7983_interleave_0 = const()[name = string("op_7983_interleave_0"), val = bool(false)]; + tensor var_7983_cast_fp16 = concat(axis = var_7982, interleave = var_7983_interleave_0, values = (var_7980_cast_fp16, x1_41_cast_fp16))[name = string("op_7983_cast_fp16")]; + tensor var_7984_cast_fp16 = mul(x = var_7983_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7984_cast_fp16")]; + tensor query_states_41_cast_fp16 = add(x = var_7959_cast_fp16, y = var_7984_cast_fp16)[name = string("query_states_41_cast_fp16")]; + tensor var_7987_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7987_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_397_promoted_to_fp16 = const()[name = string("const_397_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8008_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_397_promoted_to_fp16)[name = string("op_8008_cast_fp16")]; + int32 var_8010 = const()[name = string("op_8010"), val = int32(-1)]; + bool var_8011_interleave_0 = const()[name = string("op_8011_interleave_0"), val = bool(false)]; + tensor var_8011_cast_fp16 = concat(axis = var_8010, interleave = var_8011_interleave_0, values = (var_8008_cast_fp16, x1_43_cast_fp16))[name = string("op_8011_cast_fp16")]; + tensor var_8012_cast_fp16 = mul(x = var_8011_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8012_cast_fp16")]; + tensor key_states_41_cast_fp16 = add(x = var_7987_cast_fp16, y = var_8012_cast_fp16)[name = string("key_states_41_cast_fp16")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([9])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([10])]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; + tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; + tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; + int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; + bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; + tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1955, concat_83_values3_0))[name = string("concat_83")]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = key_states_41_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_20")]; + tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([31])]; + tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; + tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([32])]; + int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; + bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; + tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; + tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; + tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; + int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; + bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; + tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1955, concat_87_values3_0))[name = string("concat_87")]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = var_7899, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_21")]; + tensor var_8067_begin_0 = const()[name = string("op_8067_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_8067_end_0 = const()[name = string("op_8067_end_0"), val = tensor([10, 1, 512, 256])]; + tensor var_8067_end_mask_0 = const()[name = string("op_8067_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8067_cast_fp16 = slice_by_index(begin = var_8067_begin_0, end = var_8067_end_0, end_mask = var_8067_end_mask_0, x = coreml_update_state_73)[name = string("op_8067_cast_fp16")]; + tensor var_8074_begin_0 = const()[name = string("op_8074_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_8074_end_0 = const()[name = string("op_8074_end_0"), val = tensor([32, 1, 512, 256])]; + tensor var_8074_end_mask_0 = const()[name = string("op_8074_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8074_cast_fp16 = slice_by_index(begin = var_8074_begin_0, end = var_8074_end_0, end_mask = var_8074_end_mask_0, x = coreml_update_state_73)[name = string("op_8074_cast_fp16")]; + tensor var_8111 = const()[name = string("op_8111"), val = tensor([1, 4, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_8111, x = var_8067_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_8131 = const()[name = string("op_8131"), val = tensor([1, 4, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_8131, x = var_8074_cast_fp16)[name = string("x_171_cast_fp16")]; + bool var_8158_transpose_x_1 = const()[name = string("op_8158_transpose_x_1"), val = bool(false)]; + bool var_8158_transpose_y_1 = const()[name = string("op_8158_transpose_y_1"), val = bool(true)]; + tensor var_8158 = matmul(transpose_x = var_8158_transpose_x_1, transpose_y = var_8158_transpose_y_1, x = query_states_41_cast_fp16, y = x_165_cast_fp16)[name = string("op_8158")]; + fp16 var_8159_to_fp16 = const()[name = string("op_8159_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_8158, y = var_8159_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = var_2129)[name = string("attn_weights_63_cast_fp16")]; + int32 var_8194 = const()[name = string("op_8194"), val = int32(-1)]; + tensor attn_weights_65_cast_fp16 = softmax(axis = var_8194, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; + bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; + bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; + tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = x_171_cast_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor var_8205_perm_0 = const()[name = string("op_8205_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8209 = const()[name = string("op_8209"), val = tensor([1, 1, 1024])]; + tensor var_8205_cast_fp16 = transpose(perm = var_8205_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_111")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_8209, x = var_8205_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_8214 = const()[name = string("op_8214"), val = tensor([0, 2, 1])]; + string var_8230_pad_type_0 = const()[name = string("op_8230_pad_type_0"), val = string("valid")]; + int32 var_8230_groups_0 = const()[name = string("op_8230_groups_0"), val = int32(1)]; + tensor var_8230_strides_0 = const()[name = string("op_8230_strides_0"), val = tensor([1])]; + tensor var_8230_pad_0 = const()[name = string("op_8230_pad_0"), val = tensor([0, 0])]; + tensor var_8230_dilations_0 = const()[name = string("op_8230_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582281664))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8215_cast_fp16 = transpose(perm = var_8214, x = attn_output_105_cast_fp16)[name = string("transpose_110")]; + tensor var_8230_cast_fp16 = conv(dilations = var_8230_dilations_0, groups = var_8230_groups_0, pad = var_8230_pad_0, pad_type = var_8230_pad_type_0, strides = var_8230_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_8215_cast_fp16)[name = string("op_8230_cast_fp16")]; + tensor var_8234 = const()[name = string("op_8234"), val = tensor([0, 2, 1])]; + int32 var_8245 = const()[name = string("op_8245"), val = int32(-1)]; + fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_173_cast_fp16 = transpose(perm = var_8234, x = var_8230_cast_fp16)[name = string("transpose_109")]; + tensor var_8247_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8247_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_8245, interleave = input_211_interleave_0, values = (hidden_states_173_cast_fp16, var_8247_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_8242_to_fp16 = const()[name = string("op_8242_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_8242_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor var_8261_to_fp16 = const()[name = string("op_8261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582318592)))]; + tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_8261_to_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = add(x = hidden_states_165_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + int32 var_8274 = const()[name = string("op_8274"), val = int32(-1)]; + fp16 const_410_promoted_to_fp16 = const()[name = string("const_410_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8276_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = const_410_promoted_to_fp16)[name = string("op_8276_cast_fp16")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213_cast_fp16 = concat(axis = var_8274, interleave = input_213_interleave_0, values = (hidden_states_175_cast_fp16, var_8276_cast_fp16))[name = string("input_213_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_8271_to_fp16 = const()[name = string("op_8271_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_8271_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor var_8290_to_fp16 = const()[name = string("op_8290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582320960)))]; + tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_8290_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_8302 = const()[name = string("op_8302"), val = tensor([0, 2, 1])]; + tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; + tensor var_8303_cast_fp16 = transpose(perm = var_8302, x = x_173_cast_fp16)[name = string("transpose_108")]; + tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_8303_cast_fp16)[name = string("input_215_cast_fp16")]; + string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; + tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; + tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; + int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582323328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588295360))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588516608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594488640))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; + string var_8328_mode_0 = const()[name = string("op_8328_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_8328_cast_fp16 = gelu(mode = var_8328_mode_0, x = x_175_cast_fp16)[name = string("op_8328_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_8328_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594709888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600681920))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; + tensor var_8336_axes_0 = const()[name = string("op_8336_axes_0"), val = tensor([2])]; + tensor var_8336_cast_fp16 = squeeze(axes = var_8336_axes_0, x = e_21_cast_fp16)[name = string("op_8336_cast_fp16")]; + tensor var_8337 = const()[name = string("op_8337"), val = tensor([0, 2, 1])]; + int32 var_8348 = const()[name = string("op_8348"), val = int32(-1)]; + fp16 const_414_promoted_to_fp16 = const()[name = string("const_414_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_177_cast_fp16 = transpose(perm = var_8337, x = var_8336_cast_fp16)[name = string("transpose_107")]; + tensor var_8350_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_414_promoted_to_fp16)[name = string("op_8350_cast_fp16")]; + bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; + tensor input_219_cast_fp16 = concat(axis = var_8348, interleave = input_219_interleave_0, values = (hidden_states_177_cast_fp16, var_8350_cast_fp16))[name = string("input_219_cast_fp16")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_8345_to_fp16 = const()[name = string("op_8345_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_8345_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; + tensor var_8364_to_fp16 = const()[name = string("op_8364_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600718848)))]; + tensor hidden_states_179_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_8364_to_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + int32 var_8415 = const()[name = string("op_8415"), val = int32(-1)]; + fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8417_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8417_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_8415, interleave = input_221_interleave_0, values = (hidden_states_181_cast_fp16, var_8417_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_8412_to_fp16 = const()[name = string("op_8412_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_8412_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; + tensor var_8431_to_fp16 = const()[name = string("op_8431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600721216)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_8431_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_8436 = const()[name = string("op_8436"), val = tensor([0, 2, 1])]; + tensor var_8439_axes_0 = const()[name = string("op_8439_axes_0"), val = tensor([2])]; + tensor var_8437_cast_fp16 = transpose(perm = var_8436, x = hidden_states_183_cast_fp16)[name = string("transpose_106")]; + tensor var_8439_cast_fp16 = expand_dims(axes = var_8439_axes_0, x = var_8437_cast_fp16)[name = string("op_8439_cast_fp16")]; + string var_8455_pad_type_0 = const()[name = string("op_8455_pad_type_0"), val = string("valid")]; + tensor var_8455_strides_0 = const()[name = string("op_8455_strides_0"), val = tensor([1, 1])]; + tensor var_8455_pad_0 = const()[name = string("op_8455_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8455_dilations_0 = const()[name = string("op_8455_dilations_0"), val = tensor([1, 1])]; + int32 var_8455_groups_0 = const()[name = string("op_8455_groups_0"), val = int32(1)]; + tensor var_8455 = conv(dilations = var_8455_dilations_0, groups = var_8455_groups_0, pad = var_8455_pad_0, pad_type = var_8455_pad_type_0, strides = var_8455_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_8439_cast_fp16)[name = string("op_8455")]; + tensor var_8460 = const()[name = string("op_8460"), val = tensor([1, 4, 1, 256])]; + tensor var_8461 = reshape(shape = var_8460, x = var_8455)[name = string("op_8461")]; + string var_8477_pad_type_0 = const()[name = string("op_8477_pad_type_0"), val = string("valid")]; + tensor var_8477_strides_0 = const()[name = string("op_8477_strides_0"), val = tensor([1, 1])]; + tensor var_8477_pad_0 = const()[name = string("op_8477_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8477_dilations_0 = const()[name = string("op_8477_dilations_0"), val = tensor([1, 1])]; + int32 var_8477_groups_0 = const()[name = string("op_8477_groups_0"), val = int32(1)]; + tensor var_8477 = conv(dilations = var_8477_dilations_0, groups = var_8477_groups_0, pad = var_8477_pad_0, pad_type = var_8477_pad_type_0, strides = var_8477_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_8439_cast_fp16)[name = string("op_8477")]; + tensor var_8482 = const()[name = string("op_8482"), val = tensor([1, 1, 1, 256])]; + tensor var_8483 = reshape(shape = var_8482, x = var_8477)[name = string("op_8483")]; + string var_8499_pad_type_0 = const()[name = string("op_8499_pad_type_0"), val = string("valid")]; + tensor var_8499_strides_0 = const()[name = string("op_8499_strides_0"), val = tensor([1, 1])]; + tensor var_8499_pad_0 = const()[name = string("op_8499_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8499_dilations_0 = const()[name = string("op_8499_dilations_0"), val = tensor([1, 1])]; + int32 var_8499_groups_0 = const()[name = string("op_8499_groups_0"), val = int32(1)]; + tensor var_8499 = conv(dilations = var_8499_dilations_0, groups = var_8499_groups_0, pad = var_8499_pad_0, pad_type = var_8499_pad_type_0, strides = var_8499_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_8439_cast_fp16)[name = string("op_8499")]; + tensor var_8504 = const()[name = string("op_8504"), val = tensor([1, 1, 1, 256])]; + tensor var_8505 = reshape(shape = var_8504, x = var_8499)[name = string("op_8505")]; + int32 var_8520 = const()[name = string("op_8520"), val = int32(-1)]; + fp16 const_422_promoted = const()[name = string("const_422_promoted"), val = fp16(-0x1p+0)]; + tensor var_8522 = mul(x = var_8461, y = const_422_promoted)[name = string("op_8522")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225 = concat(axis = var_8520, interleave = input_225_interleave_0, values = (var_8461, var_8522))[name = string("input_225")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_8517_to_fp16 = const()[name = string("op_8517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_8517_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; + tensor var_8536_to_fp16 = const()[name = string("op_8536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600723584)))]; + tensor q_23_cast_fp16 = mul(x = normed_271, y = var_8536_to_fp16)[name = string("q_23_cast_fp16")]; + int32 var_8547 = const()[name = string("op_8547"), val = int32(-1)]; + fp16 const_426_promoted = const()[name = string("const_426_promoted"), val = fp16(-0x1p+0)]; + tensor var_8549 = mul(x = var_8483, y = const_426_promoted)[name = string("op_8549")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227 = concat(axis = var_8547, interleave = input_227_interleave_0, values = (var_8483, var_8549))[name = string("input_227")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_8544_to_fp16 = const()[name = string("op_8544_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_8544_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; + tensor var_8563_to_fp16 = const()[name = string("op_8563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724160)))]; + tensor k_23_cast_fp16 = mul(x = normed_275, y = var_8563_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_8565_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8565_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_432_promoted_to_fp16 = const()[name = string("const_432_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8586_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_432_promoted_to_fp16)[name = string("op_8586_cast_fp16")]; + int32 var_8588 = const()[name = string("op_8588"), val = int32(-1)]; + bool var_8589_interleave_0 = const()[name = string("op_8589_interleave_0"), val = bool(false)]; + tensor var_8589_cast_fp16 = concat(axis = var_8588, interleave = var_8589_interleave_0, values = (var_8586_cast_fp16, x1_45_cast_fp16))[name = string("op_8589_cast_fp16")]; + tensor var_8590_cast_fp16 = mul(x = var_8589_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8590_cast_fp16")]; + tensor query_states_45_cast_fp16 = add(x = var_8565_cast_fp16, y = var_8590_cast_fp16)[name = string("query_states_45_cast_fp16")]; + tensor var_8593_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8593_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_435_promoted_to_fp16 = const()[name = string("const_435_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8614_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_435_promoted_to_fp16)[name = string("op_8614_cast_fp16")]; + int32 var_8616 = const()[name = string("op_8616"), val = int32(-1)]; + bool var_8617_interleave_0 = const()[name = string("op_8617_interleave_0"), val = bool(false)]; + tensor var_8617_cast_fp16 = concat(axis = var_8616, interleave = var_8617_interleave_0, values = (var_8614_cast_fp16, x1_47_cast_fp16))[name = string("op_8617_cast_fp16")]; + tensor var_8618_cast_fp16 = mul(x = var_8617_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8618_cast_fp16")]; + tensor key_states_45_cast_fp16 = add(x = var_8593_cast_fp16, y = var_8618_cast_fp16)[name = string("key_states_45_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_45_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_22")]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = var_8505, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_23")]; + tensor var_8673_begin_0 = const()[name = string("op_8673_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_8673_end_0 = const()[name = string("op_8673_end_0"), val = tensor([2, 1, 4096, 256])]; + tensor var_8673_end_mask_0 = const()[name = string("op_8673_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8673_cast_fp16 = slice_by_index(begin = var_8673_begin_0, end = var_8673_end_0, end_mask = var_8673_end_mask_0, x = coreml_update_state_75)[name = string("op_8673_cast_fp16")]; + tensor var_8680_begin_0 = const()[name = string("op_8680_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_8680_end_0 = const()[name = string("op_8680_end_0"), val = tensor([6, 1, 4096, 256])]; + tensor var_8680_end_mask_0 = const()[name = string("op_8680_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8680_cast_fp16 = slice_by_index(begin = var_8680_begin_0, end = var_8680_end_0, end_mask = var_8680_end_mask_0, x = coreml_update_state_75)[name = string("op_8680_cast_fp16")]; + tensor var_8717 = const()[name = string("op_8717"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_8717, x = var_8673_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_8737 = const()[name = string("op_8737"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_8737, x = var_8680_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_8764_transpose_x_1 = const()[name = string("op_8764_transpose_x_1"), val = bool(false)]; + bool var_8764_transpose_y_1 = const()[name = string("op_8764_transpose_y_1"), val = bool(true)]; + tensor var_8764 = matmul(transpose_x = var_8764_transpose_x_1, transpose_y = var_8764_transpose_y_1, x = query_states_45_cast_fp16, y = x_181_cast_fp16)[name = string("op_8764")]; + fp16 var_8765_to_fp16 = const()[name = string("op_8765_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_67_cast_fp16 = mul(x = var_8764, y = var_8765_to_fp16)[name = string("attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; + int32 var_8800 = const()[name = string("op_8800"), val = int32(-1)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_8800, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; + bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; + tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = x_187_cast_fp16)[name = string("attn_output_111_cast_fp16")]; + tensor var_8811_perm_0 = const()[name = string("op_8811_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8815 = const()[name = string("op_8815"), val = tensor([1, 1, 1024])]; + tensor var_8811_cast_fp16 = transpose(perm = var_8811_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_105")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_8815, x = var_8811_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_8820 = const()[name = string("op_8820"), val = tensor([0, 2, 1])]; + string var_8836_pad_type_0 = const()[name = string("op_8836_pad_type_0"), val = string("valid")]; + int32 var_8836_groups_0 = const()[name = string("op_8836_groups_0"), val = int32(1)]; + tensor var_8836_strides_0 = const()[name = string("op_8836_strides_0"), val = tensor([1])]; + tensor var_8836_pad_0 = const()[name = string("op_8836_pad_0"), val = tensor([0, 0])]; + tensor var_8836_dilations_0 = const()[name = string("op_8836_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601609536))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8821_cast_fp16 = transpose(perm = var_8820, x = attn_output_115_cast_fp16)[name = string("transpose_104")]; + tensor var_8836_cast_fp16 = conv(dilations = var_8836_dilations_0, groups = var_8836_groups_0, pad = var_8836_pad_0, pad_type = var_8836_pad_type_0, strides = var_8836_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_8821_cast_fp16)[name = string("op_8836_cast_fp16")]; + tensor var_8840 = const()[name = string("op_8840"), val = tensor([0, 2, 1])]; + int32 var_8851 = const()[name = string("op_8851"), val = int32(-1)]; + fp16 const_444_promoted_to_fp16 = const()[name = string("const_444_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_189_cast_fp16 = transpose(perm = var_8840, x = var_8836_cast_fp16)[name = string("transpose_103")]; + tensor var_8853_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_444_promoted_to_fp16)[name = string("op_8853_cast_fp16")]; + bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; + tensor input_231_cast_fp16 = concat(axis = var_8851, interleave = input_231_interleave_0, values = (hidden_states_189_cast_fp16, var_8853_cast_fp16))[name = string("input_231_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_8848_to_fp16 = const()[name = string("op_8848_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_8848_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; + tensor var_8867_to_fp16 = const()[name = string("op_8867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601646464)))]; + tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_8867_to_fp16)[name = string("attn_output_119_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + int32 var_8880 = const()[name = string("op_8880"), val = int32(-1)]; + fp16 const_448_promoted_to_fp16 = const()[name = string("const_448_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8882_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_448_promoted_to_fp16)[name = string("op_8882_cast_fp16")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233_cast_fp16 = concat(axis = var_8880, interleave = input_233_interleave_0, values = (hidden_states_191_cast_fp16, var_8882_cast_fp16))[name = string("input_233_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_8877_to_fp16 = const()[name = string("op_8877_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_8877_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; + tensor var_8896_to_fp16 = const()[name = string("op_8896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601648832)))]; + tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_8896_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_8908 = const()[name = string("op_8908"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_8909_cast_fp16 = transpose(perm = var_8908, x = x_189_cast_fp16)[name = string("transpose_102")]; + tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_8909_cast_fp16)[name = string("input_235_cast_fp16")]; + string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; + tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; + tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; + int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601651200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607623232))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607844480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613816512))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; + string var_8934_mode_0 = const()[name = string("op_8934_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_8934_cast_fp16 = gelu(mode = var_8934_mode_0, x = x_191_cast_fp16)[name = string("op_8934_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = var_8934_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614037760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620009792))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; + tensor var_8942_axes_0 = const()[name = string("op_8942_axes_0"), val = tensor([2])]; + tensor var_8942_cast_fp16 = squeeze(axes = var_8942_axes_0, x = e_23_cast_fp16)[name = string("op_8942_cast_fp16")]; + tensor var_8943 = const()[name = string("op_8943"), val = tensor([0, 2, 1])]; + int32 var_8954 = const()[name = string("op_8954"), val = int32(-1)]; + fp16 const_452_promoted_to_fp16 = const()[name = string("const_452_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_193_cast_fp16 = transpose(perm = var_8943, x = var_8942_cast_fp16)[name = string("transpose_101")]; + tensor var_8956_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_452_promoted_to_fp16)[name = string("op_8956_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_8954, interleave = input_239_interleave_0, values = (hidden_states_193_cast_fp16, var_8956_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_8951_to_fp16 = const()[name = string("op_8951_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_8951_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor var_8970_to_fp16 = const()[name = string("op_8970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620046720)))]; + tensor hidden_states_195_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_8970_to_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = hidden_states_195_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + int32 var_9021 = const()[name = string("op_9021"), val = int32(-1)]; + fp16 const_456_promoted_to_fp16 = const()[name = string("const_456_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9023_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = const_456_promoted_to_fp16)[name = string("op_9023_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_9021, interleave = input_241_interleave_0, values = (hidden_states_197_cast_fp16, var_9023_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_9018_to_fp16 = const()[name = string("op_9018_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_9018_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor var_9037_to_fp16 = const()[name = string("op_9037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620049088)))]; + tensor hidden_states_199_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_9037_to_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor var_9042 = const()[name = string("op_9042"), val = tensor([0, 2, 1])]; + tensor var_9045_axes_0 = const()[name = string("op_9045_axes_0"), val = tensor([2])]; + tensor var_9043_cast_fp16 = transpose(perm = var_9042, x = hidden_states_199_cast_fp16)[name = string("transpose_100")]; + tensor var_9045_cast_fp16 = expand_dims(axes = var_9045_axes_0, x = var_9043_cast_fp16)[name = string("op_9045_cast_fp16")]; + string var_9061_pad_type_0 = const()[name = string("op_9061_pad_type_0"), val = string("valid")]; + tensor var_9061_strides_0 = const()[name = string("op_9061_strides_0"), val = tensor([1, 1])]; + tensor var_9061_pad_0 = const()[name = string("op_9061_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9061_dilations_0 = const()[name = string("op_9061_dilations_0"), val = tensor([1, 1])]; + int32 var_9061_groups_0 = const()[name = string("op_9061_groups_0"), val = int32(1)]; + tensor var_9061 = conv(dilations = var_9061_dilations_0, groups = var_9061_groups_0, pad = var_9061_pad_0, pad_type = var_9061_pad_type_0, strides = var_9061_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_9045_cast_fp16)[name = string("op_9061")]; + tensor var_9066 = const()[name = string("op_9066"), val = tensor([1, 4, 1, 256])]; + tensor var_9067 = reshape(shape = var_9066, x = var_9061)[name = string("op_9067")]; + string var_9083_pad_type_0 = const()[name = string("op_9083_pad_type_0"), val = string("valid")]; + tensor var_9083_strides_0 = const()[name = string("op_9083_strides_0"), val = tensor([1, 1])]; + tensor var_9083_pad_0 = const()[name = string("op_9083_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9083_dilations_0 = const()[name = string("op_9083_dilations_0"), val = tensor([1, 1])]; + int32 var_9083_groups_0 = const()[name = string("op_9083_groups_0"), val = int32(1)]; + tensor var_9083 = conv(dilations = var_9083_dilations_0, groups = var_9083_groups_0, pad = var_9083_pad_0, pad_type = var_9083_pad_type_0, strides = var_9083_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_9045_cast_fp16)[name = string("op_9083")]; + tensor var_9088 = const()[name = string("op_9088"), val = tensor([1, 1, 1, 256])]; + tensor var_9089 = reshape(shape = var_9088, x = var_9083)[name = string("op_9089")]; + string var_9105_pad_type_0 = const()[name = string("op_9105_pad_type_0"), val = string("valid")]; + tensor var_9105_strides_0 = const()[name = string("op_9105_strides_0"), val = tensor([1, 1])]; + tensor var_9105_pad_0 = const()[name = string("op_9105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9105_dilations_0 = const()[name = string("op_9105_dilations_0"), val = tensor([1, 1])]; + int32 var_9105_groups_0 = const()[name = string("op_9105_groups_0"), val = int32(1)]; + tensor var_9105 = conv(dilations = var_9105_dilations_0, groups = var_9105_groups_0, pad = var_9105_pad_0, pad_type = var_9105_pad_type_0, strides = var_9105_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_9045_cast_fp16)[name = string("op_9105")]; + tensor var_9110 = const()[name = string("op_9110"), val = tensor([1, 1, 1, 256])]; + tensor var_9111 = reshape(shape = var_9110, x = var_9105)[name = string("op_9111")]; + int32 var_9126 = const()[name = string("op_9126"), val = int32(-1)]; + fp16 const_460_promoted = const()[name = string("const_460_promoted"), val = fp16(-0x1p+0)]; + tensor var_9128 = mul(x = var_9067, y = const_460_promoted)[name = string("op_9128")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_9126, interleave = input_245_interleave_0, values = (var_9067, var_9128))[name = string("input_245")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_9123_to_fp16 = const()[name = string("op_9123_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_9123_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor var_9142_to_fp16 = const()[name = string("op_9142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620051456)))]; + tensor q_25_cast_fp16 = mul(x = normed_295, y = var_9142_to_fp16)[name = string("q_25_cast_fp16")]; + int32 var_9153 = const()[name = string("op_9153"), val = int32(-1)]; + fp16 const_464_promoted = const()[name = string("const_464_promoted"), val = fp16(-0x1p+0)]; + tensor var_9155 = mul(x = var_9089, y = const_464_promoted)[name = string("op_9155")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_9153, interleave = input_247_interleave_0, values = (var_9089, var_9155))[name = string("input_247")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_9150_to_fp16 = const()[name = string("op_9150_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_9150_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor var_9169_to_fp16 = const()[name = string("op_9169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052032)))]; + tensor k_25_cast_fp16 = mul(x = normed_299, y = var_9169_to_fp16)[name = string("k_25_cast_fp16")]; + tensor var_9171_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9171_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_470_promoted_to_fp16 = const()[name = string("const_470_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9192_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_470_promoted_to_fp16)[name = string("op_9192_cast_fp16")]; + int32 var_9194 = const()[name = string("op_9194"), val = int32(-1)]; + bool var_9195_interleave_0 = const()[name = string("op_9195_interleave_0"), val = bool(false)]; + tensor var_9195_cast_fp16 = concat(axis = var_9194, interleave = var_9195_interleave_0, values = (var_9192_cast_fp16, x1_49_cast_fp16))[name = string("op_9195_cast_fp16")]; + tensor var_9196_cast_fp16 = mul(x = var_9195_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9196_cast_fp16")]; + tensor query_states_49_cast_fp16 = add(x = var_9171_cast_fp16, y = var_9196_cast_fp16)[name = string("query_states_49_cast_fp16")]; + tensor var_9199_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9199_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9220_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_473_promoted_to_fp16)[name = string("op_9220_cast_fp16")]; + int32 var_9222 = const()[name = string("op_9222"), val = int32(-1)]; + bool var_9223_interleave_0 = const()[name = string("op_9223_interleave_0"), val = bool(false)]; + tensor var_9223_cast_fp16 = concat(axis = var_9222, interleave = var_9223_interleave_0, values = (var_9220_cast_fp16, x1_51_cast_fp16))[name = string("op_9223_cast_fp16")]; + tensor var_9224_cast_fp16 = mul(x = var_9223_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9224_cast_fp16")]; + tensor key_states_49_cast_fp16 = add(x = var_9199_cast_fp16, y = var_9224_cast_fp16)[name = string("key_states_49_cast_fp16")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([10])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([11])]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; + tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; + tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; + int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; + bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; + tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1955, concat_99_values3_0))[name = string("concat_99")]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = key_states_49_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_24")]; + tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([32])]; + tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; + tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; + tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([33])]; + int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; + bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; + tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; + tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; + tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1955, concat_103_values3_0))[name = string("concat_103")]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = var_9111, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_25")]; + tensor var_9279_begin_0 = const()[name = string("op_9279_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_9279_end_0 = const()[name = string("op_9279_end_0"), val = tensor([11, 1, 512, 256])]; + tensor var_9279_end_mask_0 = const()[name = string("op_9279_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9279_cast_fp16 = slice_by_index(begin = var_9279_begin_0, end = var_9279_end_0, end_mask = var_9279_end_mask_0, x = coreml_update_state_77)[name = string("op_9279_cast_fp16")]; + tensor var_9286_begin_0 = const()[name = string("op_9286_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_9286_end_0 = const()[name = string("op_9286_end_0"), val = tensor([33, 1, 512, 256])]; + tensor var_9286_end_mask_0 = const()[name = string("op_9286_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9286_cast_fp16 = slice_by_index(begin = var_9286_begin_0, end = var_9286_end_0, end_mask = var_9286_end_mask_0, x = coreml_update_state_77)[name = string("op_9286_cast_fp16")]; + tensor var_9323 = const()[name = string("op_9323"), val = tensor([1, 4, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_9323, x = var_9279_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_9343 = const()[name = string("op_9343"), val = tensor([1, 4, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_9343, x = var_9286_cast_fp16)[name = string("x_203_cast_fp16")]; + bool var_9370_transpose_x_1 = const()[name = string("op_9370_transpose_x_1"), val = bool(false)]; + bool var_9370_transpose_y_1 = const()[name = string("op_9370_transpose_y_1"), val = bool(true)]; + tensor var_9370 = matmul(transpose_x = var_9370_transpose_x_1, transpose_y = var_9370_transpose_y_1, x = query_states_49_cast_fp16, y = x_197_cast_fp16)[name = string("op_9370")]; + fp16 var_9371_to_fp16 = const()[name = string("op_9371_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_9370, y = var_9371_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = var_2129)[name = string("attn_weights_75_cast_fp16")]; + int32 var_9406 = const()[name = string("op_9406"), val = int32(-1)]; + tensor attn_weights_77_cast_fp16 = softmax(axis = var_9406, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; + bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; + tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = x_203_cast_fp16)[name = string("attn_output_121_cast_fp16")]; + tensor var_9417_perm_0 = const()[name = string("op_9417_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9421 = const()[name = string("op_9421"), val = tensor([1, 1, 1024])]; + tensor var_9417_cast_fp16 = transpose(perm = var_9417_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_99")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_9421, x = var_9417_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_9426 = const()[name = string("op_9426"), val = tensor([0, 2, 1])]; + string var_9442_pad_type_0 = const()[name = string("op_9442_pad_type_0"), val = string("valid")]; + int32 var_9442_groups_0 = const()[name = string("op_9442_groups_0"), val = int32(1)]; + tensor var_9442_strides_0 = const()[name = string("op_9442_strides_0"), val = tensor([1])]; + tensor var_9442_pad_0 = const()[name = string("op_9442_pad_0"), val = tensor([0, 0])]; + tensor var_9442_dilations_0 = const()[name = string("op_9442_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620937408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9427_cast_fp16 = transpose(perm = var_9426, x = attn_output_125_cast_fp16)[name = string("transpose_98")]; + tensor var_9442_cast_fp16 = conv(dilations = var_9442_dilations_0, groups = var_9442_groups_0, pad = var_9442_pad_0, pad_type = var_9442_pad_type_0, strides = var_9442_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_9427_cast_fp16)[name = string("op_9442_cast_fp16")]; + tensor var_9446 = const()[name = string("op_9446"), val = tensor([0, 2, 1])]; + int32 var_9457 = const()[name = string("op_9457"), val = int32(-1)]; + fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_205_cast_fp16 = transpose(perm = var_9446, x = var_9442_cast_fp16)[name = string("transpose_97")]; + tensor var_9459_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_9459_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_9457, interleave = input_251_interleave_0, values = (hidden_states_205_cast_fp16, var_9459_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_9454_to_fp16 = const()[name = string("op_9454_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_9454_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor var_9473_to_fp16 = const()[name = string("op_9473_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620974336)))]; + tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_9473_to_fp16)[name = string("attn_output_129_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = add(x = hidden_states_197_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + int32 var_9486 = const()[name = string("op_9486"), val = int32(-1)]; + fp16 const_486_promoted_to_fp16 = const()[name = string("const_486_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9488_cast_fp16 = mul(x = hidden_states_207_cast_fp16, y = const_486_promoted_to_fp16)[name = string("op_9488_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_9486, interleave = input_253_interleave_0, values = (hidden_states_207_cast_fp16, var_9488_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_9483_to_fp16 = const()[name = string("op_9483_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_9483_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor var_9502_to_fp16 = const()[name = string("op_9502_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620976704)))]; + tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_9502_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_9514 = const()[name = string("op_9514"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_9515_cast_fp16 = transpose(perm = var_9514, x = x_205_cast_fp16)[name = string("transpose_96")]; + tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_9515_cast_fp16)[name = string("input_255_cast_fp16")]; + string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; + tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; + tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; + int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620979072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626951104))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627172352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633144384))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; + string var_9540_mode_0 = const()[name = string("op_9540_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9540_cast_fp16 = gelu(mode = var_9540_mode_0, x = x_207_cast_fp16)[name = string("op_9540_cast_fp16")]; + tensor input_257_cast_fp16 = mul(x = var_9540_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633365632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639337664))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; + tensor var_9548_axes_0 = const()[name = string("op_9548_axes_0"), val = tensor([2])]; + tensor var_9548_cast_fp16 = squeeze(axes = var_9548_axes_0, x = e_25_cast_fp16)[name = string("op_9548_cast_fp16")]; + tensor var_9549 = const()[name = string("op_9549"), val = tensor([0, 2, 1])]; + int32 var_9560 = const()[name = string("op_9560"), val = int32(-1)]; + fp16 const_490_promoted_to_fp16 = const()[name = string("const_490_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_209_cast_fp16 = transpose(perm = var_9549, x = var_9548_cast_fp16)[name = string("transpose_95")]; + tensor var_9562_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_490_promoted_to_fp16)[name = string("op_9562_cast_fp16")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259_cast_fp16 = concat(axis = var_9560, interleave = input_259_interleave_0, values = (hidden_states_209_cast_fp16, var_9562_cast_fp16))[name = string("input_259_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_9557_to_fp16 = const()[name = string("op_9557_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_9557_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; + tensor var_9576_to_fp16 = const()[name = string("op_9576_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639374592)))]; + tensor hidden_states_211_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_9576_to_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = hidden_states_211_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + int32 var_9627 = const()[name = string("op_9627"), val = int32(-1)]; + fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9629_cast_fp16 = mul(x = hidden_states_213_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_9629_cast_fp16")]; + bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; + tensor input_261_cast_fp16 = concat(axis = var_9627, interleave = input_261_interleave_0, values = (hidden_states_213_cast_fp16, var_9629_cast_fp16))[name = string("input_261_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_9624_to_fp16 = const()[name = string("op_9624_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_9624_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; + tensor var_9643_to_fp16 = const()[name = string("op_9643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639376960)))]; + tensor hidden_states_215_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_9643_to_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor var_9648 = const()[name = string("op_9648"), val = tensor([0, 2, 1])]; + tensor var_9651_axes_0 = const()[name = string("op_9651_axes_0"), val = tensor([2])]; + tensor var_9649_cast_fp16 = transpose(perm = var_9648, x = hidden_states_215_cast_fp16)[name = string("transpose_94")]; + tensor var_9651_cast_fp16 = expand_dims(axes = var_9651_axes_0, x = var_9649_cast_fp16)[name = string("op_9651_cast_fp16")]; + string var_9667_pad_type_0 = const()[name = string("op_9667_pad_type_0"), val = string("valid")]; + tensor var_9667_strides_0 = const()[name = string("op_9667_strides_0"), val = tensor([1, 1])]; + tensor var_9667_pad_0 = const()[name = string("op_9667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9667_dilations_0 = const()[name = string("op_9667_dilations_0"), val = tensor([1, 1])]; + int32 var_9667_groups_0 = const()[name = string("op_9667_groups_0"), val = int32(1)]; + tensor var_9667 = conv(dilations = var_9667_dilations_0, groups = var_9667_groups_0, pad = var_9667_pad_0, pad_type = var_9667_pad_type_0, strides = var_9667_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_9651_cast_fp16)[name = string("op_9667")]; + tensor var_9672 = const()[name = string("op_9672"), val = tensor([1, 4, 1, 256])]; + tensor var_9673 = reshape(shape = var_9672, x = var_9667)[name = string("op_9673")]; + string var_9689_pad_type_0 = const()[name = string("op_9689_pad_type_0"), val = string("valid")]; + tensor var_9689_strides_0 = const()[name = string("op_9689_strides_0"), val = tensor([1, 1])]; + tensor var_9689_pad_0 = const()[name = string("op_9689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9689_dilations_0 = const()[name = string("op_9689_dilations_0"), val = tensor([1, 1])]; + int32 var_9689_groups_0 = const()[name = string("op_9689_groups_0"), val = int32(1)]; + tensor var_9689 = conv(dilations = var_9689_dilations_0, groups = var_9689_groups_0, pad = var_9689_pad_0, pad_type = var_9689_pad_type_0, strides = var_9689_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_9651_cast_fp16)[name = string("op_9689")]; + tensor var_9694 = const()[name = string("op_9694"), val = tensor([1, 1, 1, 256])]; + tensor var_9695 = reshape(shape = var_9694, x = var_9689)[name = string("op_9695")]; + string var_9711_pad_type_0 = const()[name = string("op_9711_pad_type_0"), val = string("valid")]; + tensor var_9711_strides_0 = const()[name = string("op_9711_strides_0"), val = tensor([1, 1])]; + tensor var_9711_pad_0 = const()[name = string("op_9711_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9711_dilations_0 = const()[name = string("op_9711_dilations_0"), val = tensor([1, 1])]; + int32 var_9711_groups_0 = const()[name = string("op_9711_groups_0"), val = int32(1)]; + tensor var_9711 = conv(dilations = var_9711_dilations_0, groups = var_9711_groups_0, pad = var_9711_pad_0, pad_type = var_9711_pad_type_0, strides = var_9711_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_9651_cast_fp16)[name = string("op_9711")]; + tensor var_9716 = const()[name = string("op_9716"), val = tensor([1, 1, 1, 256])]; + tensor var_9717 = reshape(shape = var_9716, x = var_9711)[name = string("op_9717")]; + int32 var_9732 = const()[name = string("op_9732"), val = int32(-1)]; + fp16 const_498_promoted = const()[name = string("const_498_promoted"), val = fp16(-0x1p+0)]; + tensor var_9734 = mul(x = var_9673, y = const_498_promoted)[name = string("op_9734")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265 = concat(axis = var_9732, interleave = input_265_interleave_0, values = (var_9673, var_9734))[name = string("input_265")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_9729_to_fp16 = const()[name = string("op_9729_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_9729_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; + tensor var_9748_to_fp16 = const()[name = string("op_9748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379328)))]; + tensor q_27_cast_fp16 = mul(x = normed_319, y = var_9748_to_fp16)[name = string("q_27_cast_fp16")]; + int32 var_9759 = const()[name = string("op_9759"), val = int32(-1)]; + fp16 const_502_promoted = const()[name = string("const_502_promoted"), val = fp16(-0x1p+0)]; + tensor var_9761 = mul(x = var_9695, y = const_502_promoted)[name = string("op_9761")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267 = concat(axis = var_9759, interleave = input_267_interleave_0, values = (var_9695, var_9761))[name = string("input_267")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_9756_to_fp16 = const()[name = string("op_9756_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_9756_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; + tensor var_9775_to_fp16 = const()[name = string("op_9775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379904)))]; + tensor k_27_cast_fp16 = mul(x = normed_323, y = var_9775_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_9777_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9777_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9798_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9798_cast_fp16")]; + int32 var_9800 = const()[name = string("op_9800"), val = int32(-1)]; + bool var_9801_interleave_0 = const()[name = string("op_9801_interleave_0"), val = bool(false)]; + tensor var_9801_cast_fp16 = concat(axis = var_9800, interleave = var_9801_interleave_0, values = (var_9798_cast_fp16, x1_53_cast_fp16))[name = string("op_9801_cast_fp16")]; + tensor var_9802_cast_fp16 = mul(x = var_9801_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9802_cast_fp16")]; + tensor query_states_53_cast_fp16 = add(x = var_9777_cast_fp16, y = var_9802_cast_fp16)[name = string("query_states_53_cast_fp16")]; + tensor var_9805_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9805_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_511_promoted_to_fp16 = const()[name = string("const_511_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9826_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_511_promoted_to_fp16)[name = string("op_9826_cast_fp16")]; + int32 var_9828 = const()[name = string("op_9828"), val = int32(-1)]; + bool var_9829_interleave_0 = const()[name = string("op_9829_interleave_0"), val = bool(false)]; + tensor var_9829_cast_fp16 = concat(axis = var_9828, interleave = var_9829_interleave_0, values = (var_9826_cast_fp16, x1_55_cast_fp16))[name = string("op_9829_cast_fp16")]; + tensor var_9830_cast_fp16 = mul(x = var_9829_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9830_cast_fp16")]; + tensor key_states_53_cast_fp16 = add(x = var_9805_cast_fp16, y = var_9830_cast_fp16)[name = string("key_states_53_cast_fp16")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([11])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([12])]; + int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; + bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; + tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; + tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; + tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; + int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; + bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; + tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1955, concat_107_values3_0))[name = string("concat_107")]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = key_states_53_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_26")]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([33])]; + tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; + tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; + tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([34])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1955, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = var_9717, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_27")]; + tensor var_9885_begin_0 = const()[name = string("op_9885_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_9885_end_0 = const()[name = string("op_9885_end_0"), val = tensor([12, 1, 512, 256])]; + tensor var_9885_end_mask_0 = const()[name = string("op_9885_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9885_cast_fp16 = slice_by_index(begin = var_9885_begin_0, end = var_9885_end_0, end_mask = var_9885_end_mask_0, x = coreml_update_state_79)[name = string("op_9885_cast_fp16")]; + tensor var_9892_begin_0 = const()[name = string("op_9892_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_9892_end_0 = const()[name = string("op_9892_end_0"), val = tensor([34, 1, 512, 256])]; + tensor var_9892_end_mask_0 = const()[name = string("op_9892_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9892_cast_fp16 = slice_by_index(begin = var_9892_begin_0, end = var_9892_end_0, end_mask = var_9892_end_mask_0, x = coreml_update_state_79)[name = string("op_9892_cast_fp16")]; + tensor var_9929 = const()[name = string("op_9929"), val = tensor([1, 4, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_9929, x = var_9885_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_9949 = const()[name = string("op_9949"), val = tensor([1, 4, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_9949, x = var_9892_cast_fp16)[name = string("x_219_cast_fp16")]; + bool var_9976_transpose_x_1 = const()[name = string("op_9976_transpose_x_1"), val = bool(false)]; + bool var_9976_transpose_y_1 = const()[name = string("op_9976_transpose_y_1"), val = bool(true)]; + tensor var_9976 = matmul(transpose_x = var_9976_transpose_x_1, transpose_y = var_9976_transpose_y_1, x = query_states_53_cast_fp16, y = x_213_cast_fp16)[name = string("op_9976")]; + fp16 var_9977_to_fp16 = const()[name = string("op_9977_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_79_cast_fp16 = mul(x = var_9976, y = var_9977_to_fp16)[name = string("attn_weights_79_cast_fp16")]; + tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = var_2129)[name = string("attn_weights_81_cast_fp16")]; + int32 var_10012 = const()[name = string("op_10012"), val = int32(-1)]; + tensor attn_weights_83_cast_fp16 = softmax(axis = var_10012, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; + bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; + bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; + tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = x_219_cast_fp16)[name = string("attn_output_131_cast_fp16")]; + tensor var_10023_perm_0 = const()[name = string("op_10023_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10027 = const()[name = string("op_10027"), val = tensor([1, 1, 1024])]; + tensor var_10023_cast_fp16 = transpose(perm = var_10023_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_93")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_10027, x = var_10023_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_10032 = const()[name = string("op_10032"), val = tensor([0, 2, 1])]; + string var_10048_pad_type_0 = const()[name = string("op_10048_pad_type_0"), val = string("valid")]; + int32 var_10048_groups_0 = const()[name = string("op_10048_groups_0"), val = int32(1)]; + tensor var_10048_strides_0 = const()[name = string("op_10048_strides_0"), val = tensor([1])]; + tensor var_10048_pad_0 = const()[name = string("op_10048_pad_0"), val = tensor([0, 0])]; + tensor var_10048_dilations_0 = const()[name = string("op_10048_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639380480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640265280))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10033_cast_fp16 = transpose(perm = var_10032, x = attn_output_135_cast_fp16)[name = string("transpose_92")]; + tensor var_10048_cast_fp16 = conv(dilations = var_10048_dilations_0, groups = var_10048_groups_0, pad = var_10048_pad_0, pad_type = var_10048_pad_type_0, strides = var_10048_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_10033_cast_fp16)[name = string("op_10048_cast_fp16")]; + tensor var_10052 = const()[name = string("op_10052"), val = tensor([0, 2, 1])]; + int32 var_10063 = const()[name = string("op_10063"), val = int32(-1)]; + fp16 const_520_promoted_to_fp16 = const()[name = string("const_520_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_221_cast_fp16 = transpose(perm = var_10052, x = var_10048_cast_fp16)[name = string("transpose_91")]; + tensor var_10065_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_520_promoted_to_fp16)[name = string("op_10065_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_10063, interleave = input_271_interleave_0, values = (hidden_states_221_cast_fp16, var_10065_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_10060_to_fp16 = const()[name = string("op_10060_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_10060_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; + tensor var_10079_to_fp16 = const()[name = string("op_10079_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640302208)))]; + tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_10079_to_fp16)[name = string("attn_output_139_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = add(x = hidden_states_213_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + int32 var_10092 = const()[name = string("op_10092"), val = int32(-1)]; + fp16 const_524_promoted_to_fp16 = const()[name = string("const_524_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10094_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = const_524_promoted_to_fp16)[name = string("op_10094_cast_fp16")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273_cast_fp16 = concat(axis = var_10092, interleave = input_273_interleave_0, values = (hidden_states_223_cast_fp16, var_10094_cast_fp16))[name = string("input_273_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_10089_to_fp16 = const()[name = string("op_10089_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_10089_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; + tensor var_10108_to_fp16 = const()[name = string("op_10108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640304576)))]; + tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_10108_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_10120 = const()[name = string("op_10120"), val = tensor([0, 2, 1])]; + tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; + tensor var_10121_cast_fp16 = transpose(perm = var_10120, x = x_221_cast_fp16)[name = string("transpose_90")]; + tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_10121_cast_fp16)[name = string("input_275_cast_fp16")]; + string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; + tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; + tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; + int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640306944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646278976))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646500224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652472256))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; + string var_10146_mode_0 = const()[name = string("op_10146_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10146_cast_fp16 = gelu(mode = var_10146_mode_0, x = x_223_cast_fp16)[name = string("op_10146_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_10146_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652693504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658665536))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; + tensor var_10154_axes_0 = const()[name = string("op_10154_axes_0"), val = tensor([2])]; + tensor var_10154_cast_fp16 = squeeze(axes = var_10154_axes_0, x = e_27_cast_fp16)[name = string("op_10154_cast_fp16")]; + tensor var_10155 = const()[name = string("op_10155"), val = tensor([0, 2, 1])]; + int32 var_10166 = const()[name = string("op_10166"), val = int32(-1)]; + fp16 const_528_promoted_to_fp16 = const()[name = string("const_528_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_225_cast_fp16 = transpose(perm = var_10155, x = var_10154_cast_fp16)[name = string("transpose_89")]; + tensor var_10168_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_528_promoted_to_fp16)[name = string("op_10168_cast_fp16")]; + bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; + tensor input_279_cast_fp16 = concat(axis = var_10166, interleave = input_279_interleave_0, values = (hidden_states_225_cast_fp16, var_10168_cast_fp16))[name = string("input_279_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_10163_to_fp16 = const()[name = string("op_10163_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_10163_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor var_10182_to_fp16 = const()[name = string("op_10182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658702464)))]; + tensor hidden_states_227_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_10182_to_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_223_cast_fp16, y = hidden_states_227_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_10233 = const()[name = string("op_10233"), val = int32(-1)]; + fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10235_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10235_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_10233, interleave = input_281_interleave_0, values = (hidden_states_229_cast_fp16, var_10235_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_10230_to_fp16 = const()[name = string("op_10230_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_10230_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor var_10249_to_fp16 = const()[name = string("op_10249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658704832)))]; + tensor hidden_states_231_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_10249_to_fp16)[name = string("hidden_states_231_cast_fp16")]; + tensor var_10254 = const()[name = string("op_10254"), val = tensor([0, 2, 1])]; + tensor var_10257_axes_0 = const()[name = string("op_10257_axes_0"), val = tensor([2])]; + tensor var_10255_cast_fp16 = transpose(perm = var_10254, x = hidden_states_231_cast_fp16)[name = string("transpose_88")]; + tensor var_10257_cast_fp16 = expand_dims(axes = var_10257_axes_0, x = var_10255_cast_fp16)[name = string("op_10257_cast_fp16")]; + string var_10273_pad_type_0 = const()[name = string("op_10273_pad_type_0"), val = string("valid")]; + tensor var_10273_strides_0 = const()[name = string("op_10273_strides_0"), val = tensor([1, 1])]; + tensor var_10273_pad_0 = const()[name = string("op_10273_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10273_dilations_0 = const()[name = string("op_10273_dilations_0"), val = tensor([1, 1])]; + int32 var_10273_groups_0 = const()[name = string("op_10273_groups_0"), val = int32(1)]; + tensor var_10273 = conv(dilations = var_10273_dilations_0, groups = var_10273_groups_0, pad = var_10273_pad_0, pad_type = var_10273_pad_type_0, strides = var_10273_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_10257_cast_fp16)[name = string("op_10273")]; + tensor var_10278 = const()[name = string("op_10278"), val = tensor([1, 4, 1, 256])]; + tensor var_10279 = reshape(shape = var_10278, x = var_10273)[name = string("op_10279")]; + string var_10295_pad_type_0 = const()[name = string("op_10295_pad_type_0"), val = string("valid")]; + tensor var_10295_strides_0 = const()[name = string("op_10295_strides_0"), val = tensor([1, 1])]; + tensor var_10295_pad_0 = const()[name = string("op_10295_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10295_dilations_0 = const()[name = string("op_10295_dilations_0"), val = tensor([1, 1])]; + int32 var_10295_groups_0 = const()[name = string("op_10295_groups_0"), val = int32(1)]; + tensor var_10295 = conv(dilations = var_10295_dilations_0, groups = var_10295_groups_0, pad = var_10295_pad_0, pad_type = var_10295_pad_type_0, strides = var_10295_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_10257_cast_fp16)[name = string("op_10295")]; + tensor var_10300 = const()[name = string("op_10300"), val = tensor([1, 1, 1, 256])]; + tensor var_10301 = reshape(shape = var_10300, x = var_10295)[name = string("op_10301")]; + string var_10317_pad_type_0 = const()[name = string("op_10317_pad_type_0"), val = string("valid")]; + tensor var_10317_strides_0 = const()[name = string("op_10317_strides_0"), val = tensor([1, 1])]; + tensor var_10317_pad_0 = const()[name = string("op_10317_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10317_dilations_0 = const()[name = string("op_10317_dilations_0"), val = tensor([1, 1])]; + int32 var_10317_groups_0 = const()[name = string("op_10317_groups_0"), val = int32(1)]; + tensor var_10317 = conv(dilations = var_10317_dilations_0, groups = var_10317_groups_0, pad = var_10317_pad_0, pad_type = var_10317_pad_type_0, strides = var_10317_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_10257_cast_fp16)[name = string("op_10317")]; + tensor var_10322 = const()[name = string("op_10322"), val = tensor([1, 1, 1, 256])]; + tensor var_10323 = reshape(shape = var_10322, x = var_10317)[name = string("op_10323")]; + int32 var_10338 = const()[name = string("op_10338"), val = int32(-1)]; + fp16 const_536_promoted = const()[name = string("const_536_promoted"), val = fp16(-0x1p+0)]; + tensor var_10340 = mul(x = var_10279, y = const_536_promoted)[name = string("op_10340")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285 = concat(axis = var_10338, interleave = input_285_interleave_0, values = (var_10279, var_10340))[name = string("input_285")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_10335_to_fp16 = const()[name = string("op_10335_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_10335_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor var_10354_to_fp16 = const()[name = string("op_10354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707200)))]; + tensor q_29_cast_fp16 = mul(x = normed_343, y = var_10354_to_fp16)[name = string("q_29_cast_fp16")]; + int32 var_10365 = const()[name = string("op_10365"), val = int32(-1)]; + fp16 const_540_promoted = const()[name = string("const_540_promoted"), val = fp16(-0x1p+0)]; + tensor var_10367 = mul(x = var_10301, y = const_540_promoted)[name = string("op_10367")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287 = concat(axis = var_10365, interleave = input_287_interleave_0, values = (var_10301, var_10367))[name = string("input_287")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_10362_to_fp16 = const()[name = string("op_10362_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_10362_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor var_10381_to_fp16 = const()[name = string("op_10381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707776)))]; + tensor k_29_cast_fp16 = mul(x = normed_347, y = var_10381_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_10383_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10383_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10404_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_10404_cast_fp16")]; + int32 var_10406 = const()[name = string("op_10406"), val = int32(-1)]; + bool var_10407_interleave_0 = const()[name = string("op_10407_interleave_0"), val = bool(false)]; + tensor var_10407_cast_fp16 = concat(axis = var_10406, interleave = var_10407_interleave_0, values = (var_10404_cast_fp16, x1_57_cast_fp16))[name = string("op_10407_cast_fp16")]; + tensor var_10408_cast_fp16 = mul(x = var_10407_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10408_cast_fp16")]; + tensor query_states_57_cast_fp16 = add(x = var_10383_cast_fp16, y = var_10408_cast_fp16)[name = string("query_states_57_cast_fp16")]; + tensor var_10411_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10411_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10432_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_10432_cast_fp16")]; + int32 var_10434 = const()[name = string("op_10434"), val = int32(-1)]; + bool var_10435_interleave_0 = const()[name = string("op_10435_interleave_0"), val = bool(false)]; + tensor var_10435_cast_fp16 = concat(axis = var_10434, interleave = var_10435_interleave_0, values = (var_10432_cast_fp16, x1_59_cast_fp16))[name = string("op_10435_cast_fp16")]; + tensor var_10436_cast_fp16 = mul(x = var_10435_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10436_cast_fp16")]; + tensor key_states_57_cast_fp16 = add(x = var_10411_cast_fp16, y = var_10436_cast_fp16)[name = string("key_states_57_cast_fp16")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([12])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([13])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1955, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = key_states_57_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_28")]; + tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([34])]; + tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; + tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([35])]; + int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; + bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; + tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; + tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; + tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; + int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; + bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; + tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1955, concat_119_values3_0))[name = string("concat_119")]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = var_10323, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_29")]; + tensor var_10491_begin_0 = const()[name = string("op_10491_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_10491_end_0 = const()[name = string("op_10491_end_0"), val = tensor([13, 1, 512, 256])]; + tensor var_10491_end_mask_0 = const()[name = string("op_10491_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10491_cast_fp16 = slice_by_index(begin = var_10491_begin_0, end = var_10491_end_0, end_mask = var_10491_end_mask_0, x = coreml_update_state_81)[name = string("op_10491_cast_fp16")]; + tensor var_10498_begin_0 = const()[name = string("op_10498_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_10498_end_0 = const()[name = string("op_10498_end_0"), val = tensor([35, 1, 512, 256])]; + tensor var_10498_end_mask_0 = const()[name = string("op_10498_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10498_cast_fp16 = slice_by_index(begin = var_10498_begin_0, end = var_10498_end_0, end_mask = var_10498_end_mask_0, x = coreml_update_state_81)[name = string("op_10498_cast_fp16")]; + tensor var_10535 = const()[name = string("op_10535"), val = tensor([1, 4, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_10535, x = var_10491_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_10555 = const()[name = string("op_10555"), val = tensor([1, 4, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_10555, x = var_10498_cast_fp16)[name = string("x_235_cast_fp16")]; + bool var_10582_transpose_x_1 = const()[name = string("op_10582_transpose_x_1"), val = bool(false)]; + bool var_10582_transpose_y_1 = const()[name = string("op_10582_transpose_y_1"), val = bool(true)]; + tensor var_10582 = matmul(transpose_x = var_10582_transpose_x_1, transpose_y = var_10582_transpose_y_1, x = query_states_57_cast_fp16, y = x_229_cast_fp16)[name = string("op_10582")]; + fp16 var_10583_to_fp16 = const()[name = string("op_10583_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_10582, y = var_10583_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = var_2129)[name = string("attn_weights_87_cast_fp16")]; + int32 var_10618 = const()[name = string("op_10618"), val = int32(-1)]; + tensor attn_weights_89_cast_fp16 = softmax(axis = var_10618, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; + bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; + bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; + tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = x_235_cast_fp16)[name = string("attn_output_141_cast_fp16")]; + tensor var_10629_perm_0 = const()[name = string("op_10629_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10633 = const()[name = string("op_10633"), val = tensor([1, 1, 1024])]; + tensor var_10629_cast_fp16 = transpose(perm = var_10629_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_87")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_10633, x = var_10629_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_10638 = const()[name = string("op_10638"), val = tensor([0, 2, 1])]; + string var_10654_pad_type_0 = const()[name = string("op_10654_pad_type_0"), val = string("valid")]; + int32 var_10654_groups_0 = const()[name = string("op_10654_groups_0"), val = int32(1)]; + tensor var_10654_strides_0 = const()[name = string("op_10654_strides_0"), val = tensor([1])]; + tensor var_10654_pad_0 = const()[name = string("op_10654_pad_0"), val = tensor([0, 0])]; + tensor var_10654_dilations_0 = const()[name = string("op_10654_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658708352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659593152))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10639_cast_fp16 = transpose(perm = var_10638, x = attn_output_145_cast_fp16)[name = string("transpose_86")]; + tensor var_10654_cast_fp16 = conv(dilations = var_10654_dilations_0, groups = var_10654_groups_0, pad = var_10654_pad_0, pad_type = var_10654_pad_type_0, strides = var_10654_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_10639_cast_fp16)[name = string("op_10654_cast_fp16")]; + tensor var_10658 = const()[name = string("op_10658"), val = tensor([0, 2, 1])]; + int32 var_10669 = const()[name = string("op_10669"), val = int32(-1)]; + fp16 const_558_promoted_to_fp16 = const()[name = string("const_558_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_237_cast_fp16 = transpose(perm = var_10658, x = var_10654_cast_fp16)[name = string("transpose_85")]; + tensor var_10671_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_558_promoted_to_fp16)[name = string("op_10671_cast_fp16")]; + bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; + tensor input_291_cast_fp16 = concat(axis = var_10669, interleave = input_291_interleave_0, values = (hidden_states_237_cast_fp16, var_10671_cast_fp16))[name = string("input_291_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_10666_to_fp16 = const()[name = string("op_10666_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_10666_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor var_10685_to_fp16 = const()[name = string("op_10685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659630080)))]; + tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_10685_to_fp16)[name = string("attn_output_149_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_10698 = const()[name = string("op_10698"), val = int32(-1)]; + fp16 const_562_promoted_to_fp16 = const()[name = string("const_562_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10700_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_562_promoted_to_fp16)[name = string("op_10700_cast_fp16")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293_cast_fp16 = concat(axis = var_10698, interleave = input_293_interleave_0, values = (hidden_states_239_cast_fp16, var_10700_cast_fp16))[name = string("input_293_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_10695_to_fp16 = const()[name = string("op_10695_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_10695_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor var_10714_to_fp16 = const()[name = string("op_10714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659632448)))]; + tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_10714_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_10726 = const()[name = string("op_10726"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_10727_cast_fp16 = transpose(perm = var_10726, x = x_237_cast_fp16)[name = string("transpose_84")]; + tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_10727_cast_fp16)[name = string("input_295_cast_fp16")]; + string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; + tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; + tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; + int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659634816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665606848))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665828096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671800128))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; + string var_10752_mode_0 = const()[name = string("op_10752_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10752_cast_fp16 = gelu(mode = var_10752_mode_0, x = x_239_cast_fp16)[name = string("op_10752_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = var_10752_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672021376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677993408))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; + tensor var_10760_axes_0 = const()[name = string("op_10760_axes_0"), val = tensor([2])]; + tensor var_10760_cast_fp16 = squeeze(axes = var_10760_axes_0, x = e_29_cast_fp16)[name = string("op_10760_cast_fp16")]; + tensor var_10761 = const()[name = string("op_10761"), val = tensor([0, 2, 1])]; + int32 var_10772 = const()[name = string("op_10772"), val = int32(-1)]; + fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_241_cast_fp16 = transpose(perm = var_10761, x = var_10760_cast_fp16)[name = string("transpose_83")]; + tensor var_10774_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_10774_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_10772, interleave = input_299_interleave_0, values = (hidden_states_241_cast_fp16, var_10774_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_10769_to_fp16 = const()[name = string("op_10769_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_10769_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; + tensor var_10788_to_fp16 = const()[name = string("op_10788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678030336)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_10788_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor hidden_states_245_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; + int32 var_10839 = const()[name = string("op_10839"), val = int32(-1)]; + fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10841_cast_fp16 = mul(x = hidden_states_245_cast_fp16, y = const_570_promoted_to_fp16)[name = string("op_10841_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_10839, interleave = input_301_interleave_0, values = (hidden_states_245_cast_fp16, var_10841_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_10836_to_fp16 = const()[name = string("op_10836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_10836_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; + tensor var_10855_to_fp16 = const()[name = string("op_10855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678032704)))]; + tensor hidden_states_247_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_10855_to_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_10860 = const()[name = string("op_10860"), val = tensor([0, 2, 1])]; + tensor var_10863_axes_0 = const()[name = string("op_10863_axes_0"), val = tensor([2])]; + tensor var_10861_cast_fp16 = transpose(perm = var_10860, x = hidden_states_247_cast_fp16)[name = string("transpose_82")]; + tensor var_10863_cast_fp16 = expand_dims(axes = var_10863_axes_0, x = var_10861_cast_fp16)[name = string("op_10863_cast_fp16")]; + string var_10879_pad_type_0 = const()[name = string("op_10879_pad_type_0"), val = string("valid")]; + tensor var_10879_strides_0 = const()[name = string("op_10879_strides_0"), val = tensor([1, 1])]; + tensor var_10879_pad_0 = const()[name = string("op_10879_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10879_dilations_0 = const()[name = string("op_10879_dilations_0"), val = tensor([1, 1])]; + int32 var_10879_groups_0 = const()[name = string("op_10879_groups_0"), val = int32(1)]; + tensor var_10879 = conv(dilations = var_10879_dilations_0, groups = var_10879_groups_0, pad = var_10879_pad_0, pad_type = var_10879_pad_type_0, strides = var_10879_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_10863_cast_fp16)[name = string("op_10879")]; + tensor var_10884 = const()[name = string("op_10884"), val = tensor([1, 4, 1, 256])]; + tensor var_10885 = reshape(shape = var_10884, x = var_10879)[name = string("op_10885")]; + string var_10901_pad_type_0 = const()[name = string("op_10901_pad_type_0"), val = string("valid")]; + tensor var_10901_strides_0 = const()[name = string("op_10901_strides_0"), val = tensor([1, 1])]; + tensor var_10901_pad_0 = const()[name = string("op_10901_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10901_dilations_0 = const()[name = string("op_10901_dilations_0"), val = tensor([1, 1])]; + int32 var_10901_groups_0 = const()[name = string("op_10901_groups_0"), val = int32(1)]; + tensor var_10901 = conv(dilations = var_10901_dilations_0, groups = var_10901_groups_0, pad = var_10901_pad_0, pad_type = var_10901_pad_type_0, strides = var_10901_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_10863_cast_fp16)[name = string("op_10901")]; + tensor var_10906 = const()[name = string("op_10906"), val = tensor([1, 1, 1, 256])]; + tensor var_10907 = reshape(shape = var_10906, x = var_10901)[name = string("op_10907")]; + string var_10923_pad_type_0 = const()[name = string("op_10923_pad_type_0"), val = string("valid")]; + tensor var_10923_strides_0 = const()[name = string("op_10923_strides_0"), val = tensor([1, 1])]; + tensor var_10923_pad_0 = const()[name = string("op_10923_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10923_dilations_0 = const()[name = string("op_10923_dilations_0"), val = tensor([1, 1])]; + int32 var_10923_groups_0 = const()[name = string("op_10923_groups_0"), val = int32(1)]; + tensor var_10923 = conv(dilations = var_10923_dilations_0, groups = var_10923_groups_0, pad = var_10923_pad_0, pad_type = var_10923_pad_type_0, strides = var_10923_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_10863_cast_fp16)[name = string("op_10923")]; + tensor var_10928 = const()[name = string("op_10928"), val = tensor([1, 1, 1, 256])]; + tensor var_10929 = reshape(shape = var_10928, x = var_10923)[name = string("op_10929")]; + int32 var_10944 = const()[name = string("op_10944"), val = int32(-1)]; + fp16 const_574_promoted = const()[name = string("const_574_promoted"), val = fp16(-0x1p+0)]; + tensor var_10946 = mul(x = var_10885, y = const_574_promoted)[name = string("op_10946")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_10944, interleave = input_305_interleave_0, values = (var_10885, var_10946))[name = string("input_305")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_10941_to_fp16 = const()[name = string("op_10941_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_10941_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; + tensor var_10960_to_fp16 = const()[name = string("op_10960_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035072)))]; + tensor q_31_cast_fp16 = mul(x = normed_367, y = var_10960_to_fp16)[name = string("q_31_cast_fp16")]; + int32 var_10971 = const()[name = string("op_10971"), val = int32(-1)]; + fp16 const_578_promoted = const()[name = string("const_578_promoted"), val = fp16(-0x1p+0)]; + tensor var_10973 = mul(x = var_10907, y = const_578_promoted)[name = string("op_10973")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_10971, interleave = input_307_interleave_0, values = (var_10907, var_10973))[name = string("input_307")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_10968_to_fp16 = const()[name = string("op_10968_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_10968_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; + tensor var_10987_to_fp16 = const()[name = string("op_10987_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035648)))]; + tensor k_31_cast_fp16 = mul(x = normed_371, y = var_10987_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_10989_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10989_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_584_promoted_to_fp16 = const()[name = string("const_584_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11010_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_584_promoted_to_fp16)[name = string("op_11010_cast_fp16")]; + int32 var_11012 = const()[name = string("op_11012"), val = int32(-1)]; + bool var_11013_interleave_0 = const()[name = string("op_11013_interleave_0"), val = bool(false)]; + tensor var_11013_cast_fp16 = concat(axis = var_11012, interleave = var_11013_interleave_0, values = (var_11010_cast_fp16, x1_61_cast_fp16))[name = string("op_11013_cast_fp16")]; + tensor var_11014_cast_fp16 = mul(x = var_11013_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11014_cast_fp16")]; + tensor query_states_61_cast_fp16 = add(x = var_10989_cast_fp16, y = var_11014_cast_fp16)[name = string("query_states_61_cast_fp16")]; + tensor var_11017_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11017_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_587_promoted_to_fp16 = const()[name = string("const_587_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11038_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_587_promoted_to_fp16)[name = string("op_11038_cast_fp16")]; + int32 var_11040 = const()[name = string("op_11040"), val = int32(-1)]; + bool var_11041_interleave_0 = const()[name = string("op_11041_interleave_0"), val = bool(false)]; + tensor var_11041_cast_fp16 = concat(axis = var_11040, interleave = var_11041_interleave_0, values = (var_11038_cast_fp16, x1_63_cast_fp16))[name = string("op_11041_cast_fp16")]; + tensor var_11042_cast_fp16 = mul(x = var_11041_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11042_cast_fp16")]; + tensor key_states_61_cast_fp16 = add(x = var_11017_cast_fp16, y = var_11042_cast_fp16)[name = string("key_states_61_cast_fp16")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([13])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([14])]; + int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; + bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; + tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; + tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; + tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; + int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; + bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; + tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1955, concat_123_values3_0))[name = string("concat_123")]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = key_states_61_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_30")]; + tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([35])]; + tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; + tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; + tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([36])]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; + tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; + tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; + int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; + bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; + tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1955, concat_127_values3_0))[name = string("concat_127")]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = var_10929, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_31")]; + tensor var_11097_begin_0 = const()[name = string("op_11097_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_11097_end_0 = const()[name = string("op_11097_end_0"), val = tensor([14, 1, 512, 256])]; + tensor var_11097_end_mask_0 = const()[name = string("op_11097_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11097_cast_fp16 = slice_by_index(begin = var_11097_begin_0, end = var_11097_end_0, end_mask = var_11097_end_mask_0, x = coreml_update_state_83)[name = string("op_11097_cast_fp16")]; + tensor var_11104_begin_0 = const()[name = string("op_11104_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_11104_end_0 = const()[name = string("op_11104_end_0"), val = tensor([36, 1, 512, 256])]; + tensor var_11104_end_mask_0 = const()[name = string("op_11104_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11104_cast_fp16 = slice_by_index(begin = var_11104_begin_0, end = var_11104_end_0, end_mask = var_11104_end_mask_0, x = coreml_update_state_83)[name = string("op_11104_cast_fp16")]; + tensor var_11141 = const()[name = string("op_11141"), val = tensor([1, 4, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_11141, x = var_11097_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_11161 = const()[name = string("op_11161"), val = tensor([1, 4, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_11161, x = var_11104_cast_fp16)[name = string("x_251_cast_fp16")]; + bool var_11188_transpose_x_1 = const()[name = string("op_11188_transpose_x_1"), val = bool(false)]; + bool var_11188_transpose_y_1 = const()[name = string("op_11188_transpose_y_1"), val = bool(true)]; + tensor var_11188 = matmul(transpose_x = var_11188_transpose_x_1, transpose_y = var_11188_transpose_y_1, x = query_states_61_cast_fp16, y = x_245_cast_fp16)[name = string("op_11188")]; + fp16 var_11189_to_fp16 = const()[name = string("op_11189_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_91_cast_fp16 = mul(x = var_11188, y = var_11189_to_fp16)[name = string("attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = var_2129)[name = string("attn_weights_93_cast_fp16")]; + int32 var_11224 = const()[name = string("op_11224"), val = int32(-1)]; + tensor attn_weights_95_cast_fp16 = softmax(axis = var_11224, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; + bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; + bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; + tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = x_251_cast_fp16)[name = string("attn_output_151_cast_fp16")]; + tensor var_11235_perm_0 = const()[name = string("op_11235_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11239 = const()[name = string("op_11239"), val = tensor([1, 1, 1024])]; + tensor var_11235_cast_fp16 = transpose(perm = var_11235_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_81")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_11239, x = var_11235_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_11244 = const()[name = string("op_11244"), val = tensor([0, 2, 1])]; + string var_11260_pad_type_0 = const()[name = string("op_11260_pad_type_0"), val = string("valid")]; + int32 var_11260_groups_0 = const()[name = string("op_11260_groups_0"), val = int32(1)]; + tensor var_11260_strides_0 = const()[name = string("op_11260_strides_0"), val = tensor([1])]; + tensor var_11260_pad_0 = const()[name = string("op_11260_pad_0"), val = tensor([0, 0])]; + tensor var_11260_dilations_0 = const()[name = string("op_11260_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678036224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678921024))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11245_cast_fp16 = transpose(perm = var_11244, x = attn_output_155_cast_fp16)[name = string("transpose_80")]; + tensor var_11260_cast_fp16 = conv(dilations = var_11260_dilations_0, groups = var_11260_groups_0, pad = var_11260_pad_0, pad_type = var_11260_pad_type_0, strides = var_11260_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_11245_cast_fp16)[name = string("op_11260_cast_fp16")]; + tensor var_11264 = const()[name = string("op_11264"), val = tensor([0, 2, 1])]; + int32 var_11275 = const()[name = string("op_11275"), val = int32(-1)]; + fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_253_cast_fp16 = transpose(perm = var_11264, x = var_11260_cast_fp16)[name = string("transpose_79")]; + tensor var_11277_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_596_promoted_to_fp16)[name = string("op_11277_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_11275, interleave = input_311_interleave_0, values = (hidden_states_253_cast_fp16, var_11277_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_11272_to_fp16 = const()[name = string("op_11272_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_11272_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; + tensor var_11291_to_fp16 = const()[name = string("op_11291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678957952)))]; + tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_11291_to_fp16)[name = string("attn_output_159_cast_fp16")]; + tensor hidden_states_255_cast_fp16 = add(x = hidden_states_245_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + int32 var_11304 = const()[name = string("op_11304"), val = int32(-1)]; + fp16 const_600_promoted_to_fp16 = const()[name = string("const_600_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11306_cast_fp16 = mul(x = hidden_states_255_cast_fp16, y = const_600_promoted_to_fp16)[name = string("op_11306_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_11304, interleave = input_313_interleave_0, values = (hidden_states_255_cast_fp16, var_11306_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_11301_to_fp16 = const()[name = string("op_11301_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_11301_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; + tensor var_11320_to_fp16 = const()[name = string("op_11320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678960320)))]; + tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_11320_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_11332 = const()[name = string("op_11332"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_11333_cast_fp16 = transpose(perm = var_11332, x = x_253_cast_fp16)[name = string("transpose_78")]; + tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_11333_cast_fp16)[name = string("input_315_cast_fp16")]; + string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; + tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; + tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; + int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678962688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684934720))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685155968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691128000))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; + string var_11358_mode_0 = const()[name = string("op_11358_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11358_cast_fp16 = gelu(mode = var_11358_mode_0, x = x_255_cast_fp16)[name = string("op_11358_cast_fp16")]; + tensor input_317_cast_fp16 = mul(x = var_11358_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691349248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697321280))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; + tensor var_11366_axes_0 = const()[name = string("op_11366_axes_0"), val = tensor([2])]; + tensor var_11366_cast_fp16 = squeeze(axes = var_11366_axes_0, x = e_31_cast_fp16)[name = string("op_11366_cast_fp16")]; + tensor var_11367 = const()[name = string("op_11367"), val = tensor([0, 2, 1])]; + int32 var_11378 = const()[name = string("op_11378"), val = int32(-1)]; + fp16 const_604_promoted_to_fp16 = const()[name = string("const_604_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_257_cast_fp16 = transpose(perm = var_11367, x = var_11366_cast_fp16)[name = string("transpose_77")]; + tensor var_11380_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_604_promoted_to_fp16)[name = string("op_11380_cast_fp16")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319_cast_fp16 = concat(axis = var_11378, interleave = input_319_interleave_0, values = (hidden_states_257_cast_fp16, var_11380_cast_fp16))[name = string("input_319_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_11375_to_fp16 = const()[name = string("op_11375_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_11375_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor var_11394_to_fp16 = const()[name = string("op_11394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697358208)))]; + tensor hidden_states_259_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_11394_to_fp16)[name = string("hidden_states_259_cast_fp16")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_255_cast_fp16, y = hidden_states_259_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; + int32 var_11445 = const()[name = string("op_11445"), val = int32(-1)]; + fp16 const_608_promoted_to_fp16 = const()[name = string("const_608_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11447_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_608_promoted_to_fp16)[name = string("op_11447_cast_fp16")]; + bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; + tensor input_321_cast_fp16 = concat(axis = var_11445, interleave = input_321_interleave_0, values = (hidden_states_261_cast_fp16, var_11447_cast_fp16))[name = string("input_321_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_11442_to_fp16 = const()[name = string("op_11442_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_11442_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor var_11461_to_fp16 = const()[name = string("op_11461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697360576)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_11461_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_11466 = const()[name = string("op_11466"), val = tensor([0, 2, 1])]; + tensor var_11469_axes_0 = const()[name = string("op_11469_axes_0"), val = tensor([2])]; + tensor var_11467_cast_fp16 = transpose(perm = var_11466, x = hidden_states_263_cast_fp16)[name = string("transpose_76")]; + tensor var_11469_cast_fp16 = expand_dims(axes = var_11469_axes_0, x = var_11467_cast_fp16)[name = string("op_11469_cast_fp16")]; + string var_11485_pad_type_0 = const()[name = string("op_11485_pad_type_0"), val = string("valid")]; + tensor var_11485_strides_0 = const()[name = string("op_11485_strides_0"), val = tensor([1, 1])]; + tensor var_11485_pad_0 = const()[name = string("op_11485_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11485_dilations_0 = const()[name = string("op_11485_dilations_0"), val = tensor([1, 1])]; + int32 var_11485_groups_0 = const()[name = string("op_11485_groups_0"), val = int32(1)]; + tensor var_11485 = conv(dilations = var_11485_dilations_0, groups = var_11485_groups_0, pad = var_11485_pad_0, pad_type = var_11485_pad_type_0, strides = var_11485_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_11469_cast_fp16)[name = string("op_11485")]; + tensor var_11490 = const()[name = string("op_11490"), val = tensor([1, 4, 1, 256])]; + tensor var_11491 = reshape(shape = var_11490, x = var_11485)[name = string("op_11491")]; + string var_11507_pad_type_0 = const()[name = string("op_11507_pad_type_0"), val = string("valid")]; + tensor var_11507_strides_0 = const()[name = string("op_11507_strides_0"), val = tensor([1, 1])]; + tensor var_11507_pad_0 = const()[name = string("op_11507_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11507_dilations_0 = const()[name = string("op_11507_dilations_0"), val = tensor([1, 1])]; + int32 var_11507_groups_0 = const()[name = string("op_11507_groups_0"), val = int32(1)]; + tensor var_11507 = conv(dilations = var_11507_dilations_0, groups = var_11507_groups_0, pad = var_11507_pad_0, pad_type = var_11507_pad_type_0, strides = var_11507_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_11469_cast_fp16)[name = string("op_11507")]; + tensor var_11512 = const()[name = string("op_11512"), val = tensor([1, 1, 1, 256])]; + tensor var_11513 = reshape(shape = var_11512, x = var_11507)[name = string("op_11513")]; + string var_11529_pad_type_0 = const()[name = string("op_11529_pad_type_0"), val = string("valid")]; + tensor var_11529_strides_0 = const()[name = string("op_11529_strides_0"), val = tensor([1, 1])]; + tensor var_11529_pad_0 = const()[name = string("op_11529_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11529_dilations_0 = const()[name = string("op_11529_dilations_0"), val = tensor([1, 1])]; + int32 var_11529_groups_0 = const()[name = string("op_11529_groups_0"), val = int32(1)]; + tensor var_11529 = conv(dilations = var_11529_dilations_0, groups = var_11529_groups_0, pad = var_11529_pad_0, pad_type = var_11529_pad_type_0, strides = var_11529_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_11469_cast_fp16)[name = string("op_11529")]; + tensor var_11534 = const()[name = string("op_11534"), val = tensor([1, 1, 1, 256])]; + tensor var_11535 = reshape(shape = var_11534, x = var_11529)[name = string("op_11535")]; + int32 var_11550 = const()[name = string("op_11550"), val = int32(-1)]; + fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; + tensor var_11552 = mul(x = var_11491, y = const_612_promoted)[name = string("op_11552")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325 = concat(axis = var_11550, interleave = input_325_interleave_0, values = (var_11491, var_11552))[name = string("input_325")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_11547_to_fp16 = const()[name = string("op_11547_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_11547_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor var_11566_to_fp16 = const()[name = string("op_11566_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697362944)))]; + tensor q_33_cast_fp16 = mul(x = normed_391, y = var_11566_to_fp16)[name = string("q_33_cast_fp16")]; + int32 var_11577 = const()[name = string("op_11577"), val = int32(-1)]; + fp16 const_616_promoted = const()[name = string("const_616_promoted"), val = fp16(-0x1p+0)]; + tensor var_11579 = mul(x = var_11513, y = const_616_promoted)[name = string("op_11579")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327 = concat(axis = var_11577, interleave = input_327_interleave_0, values = (var_11513, var_11579))[name = string("input_327")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_11574_to_fp16 = const()[name = string("op_11574_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_11574_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor var_11593_to_fp16 = const()[name = string("op_11593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697363520)))]; + tensor k_33_cast_fp16 = mul(x = normed_395, y = var_11593_to_fp16)[name = string("k_33_cast_fp16")]; + tensor var_11595_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11595_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_622_promoted_to_fp16 = const()[name = string("const_622_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11616_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_622_promoted_to_fp16)[name = string("op_11616_cast_fp16")]; + int32 var_11618 = const()[name = string("op_11618"), val = int32(-1)]; + bool var_11619_interleave_0 = const()[name = string("op_11619_interleave_0"), val = bool(false)]; + tensor var_11619_cast_fp16 = concat(axis = var_11618, interleave = var_11619_interleave_0, values = (var_11616_cast_fp16, x1_65_cast_fp16))[name = string("op_11619_cast_fp16")]; + tensor var_11620_cast_fp16 = mul(x = var_11619_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11620_cast_fp16")]; + tensor query_states_65_cast_fp16 = add(x = var_11595_cast_fp16, y = var_11620_cast_fp16)[name = string("query_states_65_cast_fp16")]; + tensor var_11623_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11623_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_625_promoted_to_fp16 = const()[name = string("const_625_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11644_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_625_promoted_to_fp16)[name = string("op_11644_cast_fp16")]; + int32 var_11646 = const()[name = string("op_11646"), val = int32(-1)]; + bool var_11647_interleave_0 = const()[name = string("op_11647_interleave_0"), val = bool(false)]; + tensor var_11647_cast_fp16 = concat(axis = var_11646, interleave = var_11647_interleave_0, values = (var_11644_cast_fp16, x1_67_cast_fp16))[name = string("op_11647_cast_fp16")]; + tensor var_11648_cast_fp16 = mul(x = var_11647_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11648_cast_fp16")]; + tensor key_states_65_cast_fp16 = add(x = var_11623_cast_fp16, y = var_11648_cast_fp16)[name = string("key_states_65_cast_fp16")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([14])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([15])]; + int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; + bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; + tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; + tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; + tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; + int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; + bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; + tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1955, concat_131_values3_0))[name = string("concat_131")]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = key_states_65_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_32")]; + tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([36])]; + tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; + tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; + tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([37])]; + int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; + bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; + tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; + tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; + tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; + int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; + bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; + tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1955, concat_135_values3_0))[name = string("concat_135")]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = var_11535, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_33")]; + tensor var_11703_begin_0 = const()[name = string("op_11703_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_11703_end_0 = const()[name = string("op_11703_end_0"), val = tensor([15, 1, 512, 256])]; + tensor var_11703_end_mask_0 = const()[name = string("op_11703_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11703_cast_fp16 = slice_by_index(begin = var_11703_begin_0, end = var_11703_end_0, end_mask = var_11703_end_mask_0, x = coreml_update_state_85)[name = string("op_11703_cast_fp16")]; + tensor var_11710_begin_0 = const()[name = string("op_11710_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_11710_end_0 = const()[name = string("op_11710_end_0"), val = tensor([37, 1, 512, 256])]; + tensor var_11710_end_mask_0 = const()[name = string("op_11710_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11710_cast_fp16 = slice_by_index(begin = var_11710_begin_0, end = var_11710_end_0, end_mask = var_11710_end_mask_0, x = coreml_update_state_85)[name = string("op_11710_cast_fp16")]; + tensor var_11747 = const()[name = string("op_11747"), val = tensor([1, 4, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_11747, x = var_11703_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_11767 = const()[name = string("op_11767"), val = tensor([1, 4, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_11767, x = var_11710_cast_fp16)[name = string("x_267_cast_fp16")]; + bool var_11794_transpose_x_1 = const()[name = string("op_11794_transpose_x_1"), val = bool(false)]; + bool var_11794_transpose_y_1 = const()[name = string("op_11794_transpose_y_1"), val = bool(true)]; + tensor var_11794 = matmul(transpose_x = var_11794_transpose_x_1, transpose_y = var_11794_transpose_y_1, x = query_states_65_cast_fp16, y = x_261_cast_fp16)[name = string("op_11794")]; + fp16 var_11795_to_fp16 = const()[name = string("op_11795_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_11794, y = var_11795_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = var_2129)[name = string("attn_weights_99_cast_fp16")]; + int32 var_11830 = const()[name = string("op_11830"), val = int32(-1)]; + tensor attn_weights_101_cast_fp16 = softmax(axis = var_11830, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; + bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; + bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; + tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = x_267_cast_fp16)[name = string("attn_output_161_cast_fp16")]; + tensor var_11841_perm_0 = const()[name = string("op_11841_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11845 = const()[name = string("op_11845"), val = tensor([1, 1, 1024])]; + tensor var_11841_cast_fp16 = transpose(perm = var_11841_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_75")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_11845, x = var_11841_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_11850 = const()[name = string("op_11850"), val = tensor([0, 2, 1])]; + string var_11866_pad_type_0 = const()[name = string("op_11866_pad_type_0"), val = string("valid")]; + int32 var_11866_groups_0 = const()[name = string("op_11866_groups_0"), val = int32(1)]; + tensor var_11866_strides_0 = const()[name = string("op_11866_strides_0"), val = tensor([1])]; + tensor var_11866_pad_0 = const()[name = string("op_11866_pad_0"), val = tensor([0, 0])]; + tensor var_11866_dilations_0 = const()[name = string("op_11866_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697364096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698248896))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11851_cast_fp16 = transpose(perm = var_11850, x = attn_output_165_cast_fp16)[name = string("transpose_74")]; + tensor var_11866_cast_fp16 = conv(dilations = var_11866_dilations_0, groups = var_11866_groups_0, pad = var_11866_pad_0, pad_type = var_11866_pad_type_0, strides = var_11866_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_11851_cast_fp16)[name = string("op_11866_cast_fp16")]; + tensor var_11870 = const()[name = string("op_11870"), val = tensor([0, 2, 1])]; + int32 var_11881 = const()[name = string("op_11881"), val = int32(-1)]; + fp16 const_634_promoted_to_fp16 = const()[name = string("const_634_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_269_cast_fp16 = transpose(perm = var_11870, x = var_11866_cast_fp16)[name = string("transpose_73")]; + tensor var_11883_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_634_promoted_to_fp16)[name = string("op_11883_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_11881, interleave = input_331_interleave_0, values = (hidden_states_269_cast_fp16, var_11883_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_11878_to_fp16 = const()[name = string("op_11878_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_11878_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor var_11897_to_fp16 = const()[name = string("op_11897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698285824)))]; + tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_11897_to_fp16)[name = string("attn_output_169_cast_fp16")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; + int32 var_11910 = const()[name = string("op_11910"), val = int32(-1)]; + fp16 const_638_promoted_to_fp16 = const()[name = string("const_638_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11912_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_638_promoted_to_fp16)[name = string("op_11912_cast_fp16")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333_cast_fp16 = concat(axis = var_11910, interleave = input_333_interleave_0, values = (hidden_states_271_cast_fp16, var_11912_cast_fp16))[name = string("input_333_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_11907_to_fp16 = const()[name = string("op_11907_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_11907_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor var_11926_to_fp16 = const()[name = string("op_11926_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698288192)))]; + tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_11926_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_11938 = const()[name = string("op_11938"), val = tensor([0, 2, 1])]; + tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; + tensor var_11939_cast_fp16 = transpose(perm = var_11938, x = x_269_cast_fp16)[name = string("transpose_72")]; + tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_11939_cast_fp16)[name = string("input_335_cast_fp16")]; + string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; + tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; + tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; + int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704262592))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704483840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710455872))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; + string var_11964_mode_0 = const()[name = string("op_11964_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11964_cast_fp16 = gelu(mode = var_11964_mode_0, x = x_271_cast_fp16)[name = string("op_11964_cast_fp16")]; + tensor input_337_cast_fp16 = mul(x = var_11964_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710677120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716649152))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; + tensor var_11972_axes_0 = const()[name = string("op_11972_axes_0"), val = tensor([2])]; + tensor var_11972_cast_fp16 = squeeze(axes = var_11972_axes_0, x = e_33_cast_fp16)[name = string("op_11972_cast_fp16")]; + tensor var_11973 = const()[name = string("op_11973"), val = tensor([0, 2, 1])]; + int32 var_11984 = const()[name = string("op_11984"), val = int32(-1)]; + fp16 const_642_promoted_to_fp16 = const()[name = string("const_642_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_273_cast_fp16 = transpose(perm = var_11973, x = var_11972_cast_fp16)[name = string("transpose_71")]; + tensor var_11986_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_642_promoted_to_fp16)[name = string("op_11986_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_11984, interleave = input_339_interleave_0, values = (hidden_states_273_cast_fp16, var_11986_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_11981_to_fp16 = const()[name = string("op_11981_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_11981_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; + tensor var_12000_to_fp16 = const()[name = string("op_12000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716686080)))]; + tensor hidden_states_275_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_12000_to_fp16)[name = string("hidden_states_275_cast_fp16")]; + tensor hidden_states_277_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = hidden_states_275_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + int32 var_12051 = const()[name = string("op_12051"), val = int32(-1)]; + fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12053_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12053_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_12051, interleave = input_341_interleave_0, values = (hidden_states_277_cast_fp16, var_12053_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_12048_to_fp16 = const()[name = string("op_12048_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_12048_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; + tensor var_12067_to_fp16 = const()[name = string("op_12067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716688448)))]; + tensor hidden_states_279_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_12067_to_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor var_12072 = const()[name = string("op_12072"), val = tensor([0, 2, 1])]; + tensor var_12075_axes_0 = const()[name = string("op_12075_axes_0"), val = tensor([2])]; + tensor var_12073_cast_fp16 = transpose(perm = var_12072, x = hidden_states_279_cast_fp16)[name = string("transpose_70")]; + tensor var_12075_cast_fp16 = expand_dims(axes = var_12075_axes_0, x = var_12073_cast_fp16)[name = string("op_12075_cast_fp16")]; + string var_12091_pad_type_0 = const()[name = string("op_12091_pad_type_0"), val = string("valid")]; + tensor var_12091_strides_0 = const()[name = string("op_12091_strides_0"), val = tensor([1, 1])]; + tensor var_12091_pad_0 = const()[name = string("op_12091_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12091_dilations_0 = const()[name = string("op_12091_dilations_0"), val = tensor([1, 1])]; + int32 var_12091_groups_0 = const()[name = string("op_12091_groups_0"), val = int32(1)]; + tensor var_12091 = conv(dilations = var_12091_dilations_0, groups = var_12091_groups_0, pad = var_12091_pad_0, pad_type = var_12091_pad_type_0, strides = var_12091_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_12075_cast_fp16)[name = string("op_12091")]; + tensor var_12096 = const()[name = string("op_12096"), val = tensor([1, 4, 1, 256])]; + tensor var_12097 = reshape(shape = var_12096, x = var_12091)[name = string("op_12097")]; + string var_12113_pad_type_0 = const()[name = string("op_12113_pad_type_0"), val = string("valid")]; + tensor var_12113_strides_0 = const()[name = string("op_12113_strides_0"), val = tensor([1, 1])]; + tensor var_12113_pad_0 = const()[name = string("op_12113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12113_dilations_0 = const()[name = string("op_12113_dilations_0"), val = tensor([1, 1])]; + int32 var_12113_groups_0 = const()[name = string("op_12113_groups_0"), val = int32(1)]; + tensor var_12113 = conv(dilations = var_12113_dilations_0, groups = var_12113_groups_0, pad = var_12113_pad_0, pad_type = var_12113_pad_type_0, strides = var_12113_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_12075_cast_fp16)[name = string("op_12113")]; + tensor var_12118 = const()[name = string("op_12118"), val = tensor([1, 1, 1, 256])]; + tensor var_12119 = reshape(shape = var_12118, x = var_12113)[name = string("op_12119")]; + string var_12135_pad_type_0 = const()[name = string("op_12135_pad_type_0"), val = string("valid")]; + tensor var_12135_strides_0 = const()[name = string("op_12135_strides_0"), val = tensor([1, 1])]; + tensor var_12135_pad_0 = const()[name = string("op_12135_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12135_dilations_0 = const()[name = string("op_12135_dilations_0"), val = tensor([1, 1])]; + int32 var_12135_groups_0 = const()[name = string("op_12135_groups_0"), val = int32(1)]; + tensor var_12135 = conv(dilations = var_12135_dilations_0, groups = var_12135_groups_0, pad = var_12135_pad_0, pad_type = var_12135_pad_type_0, strides = var_12135_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_12075_cast_fp16)[name = string("op_12135")]; + tensor var_12140 = const()[name = string("op_12140"), val = tensor([1, 1, 1, 256])]; + tensor var_12141 = reshape(shape = var_12140, x = var_12135)[name = string("op_12141")]; + int32 var_12156 = const()[name = string("op_12156"), val = int32(-1)]; + fp16 const_650_promoted = const()[name = string("const_650_promoted"), val = fp16(-0x1p+0)]; + tensor var_12158 = mul(x = var_12097, y = const_650_promoted)[name = string("op_12158")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_12156, interleave = input_345_interleave_0, values = (var_12097, var_12158))[name = string("input_345")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_12153_to_fp16 = const()[name = string("op_12153_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_12153_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; + tensor var_12172_to_fp16 = const()[name = string("op_12172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716690816)))]; + tensor q_35_cast_fp16 = mul(x = normed_415, y = var_12172_to_fp16)[name = string("q_35_cast_fp16")]; + int32 var_12183 = const()[name = string("op_12183"), val = int32(-1)]; + fp16 const_654_promoted = const()[name = string("const_654_promoted"), val = fp16(-0x1p+0)]; + tensor var_12185 = mul(x = var_12119, y = const_654_promoted)[name = string("op_12185")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_12183, interleave = input_347_interleave_0, values = (var_12119, var_12185))[name = string("input_347")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_12180_to_fp16 = const()[name = string("op_12180_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_12180_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; + tensor var_12199_to_fp16 = const()[name = string("op_12199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691392)))]; + tensor k_35_cast_fp16 = mul(x = normed_419, y = var_12199_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_12201_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12201_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_660_promoted_to_fp16 = const()[name = string("const_660_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12222_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_660_promoted_to_fp16)[name = string("op_12222_cast_fp16")]; + int32 var_12224 = const()[name = string("op_12224"), val = int32(-1)]; + bool var_12225_interleave_0 = const()[name = string("op_12225_interleave_0"), val = bool(false)]; + tensor var_12225_cast_fp16 = concat(axis = var_12224, interleave = var_12225_interleave_0, values = (var_12222_cast_fp16, x1_69_cast_fp16))[name = string("op_12225_cast_fp16")]; + tensor var_12226_cast_fp16 = mul(x = var_12225_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12226_cast_fp16")]; + tensor query_states_69_cast_fp16 = add(x = var_12201_cast_fp16, y = var_12226_cast_fp16)[name = string("query_states_69_cast_fp16")]; + tensor var_12229_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12229_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_663_promoted_to_fp16 = const()[name = string("const_663_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12250_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_663_promoted_to_fp16)[name = string("op_12250_cast_fp16")]; + int32 var_12252 = const()[name = string("op_12252"), val = int32(-1)]; + bool var_12253_interleave_0 = const()[name = string("op_12253_interleave_0"), val = bool(false)]; + tensor var_12253_cast_fp16 = concat(axis = var_12252, interleave = var_12253_interleave_0, values = (var_12250_cast_fp16, x1_71_cast_fp16))[name = string("op_12253_cast_fp16")]; + tensor var_12254_cast_fp16 = mul(x = var_12253_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12254_cast_fp16")]; + tensor key_states_69_cast_fp16 = add(x = var_12229_cast_fp16, y = var_12254_cast_fp16)[name = string("key_states_69_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_69_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_34")]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = var_12141, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_35")]; + tensor var_12309_begin_0 = const()[name = string("op_12309_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_12309_end_0 = const()[name = string("op_12309_end_0"), val = tensor([3, 1, 4096, 256])]; + tensor var_12309_end_mask_0 = const()[name = string("op_12309_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12309_cast_fp16 = slice_by_index(begin = var_12309_begin_0, end = var_12309_end_0, end_mask = var_12309_end_mask_0, x = coreml_update_state_87)[name = string("op_12309_cast_fp16")]; + tensor var_12316_begin_0 = const()[name = string("op_12316_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_12316_end_0 = const()[name = string("op_12316_end_0"), val = tensor([7, 1, 4096, 256])]; + tensor var_12316_end_mask_0 = const()[name = string("op_12316_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12316_cast_fp16 = slice_by_index(begin = var_12316_begin_0, end = var_12316_end_0, end_mask = var_12316_end_mask_0, x = coreml_update_state_87)[name = string("op_12316_cast_fp16")]; + tensor var_12353 = const()[name = string("op_12353"), val = tensor([1, 4, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_12353, x = var_12309_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_12373 = const()[name = string("op_12373"), val = tensor([1, 4, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_12373, x = var_12316_cast_fp16)[name = string("x_283_cast_fp16")]; + bool var_12400_transpose_x_1 = const()[name = string("op_12400_transpose_x_1"), val = bool(false)]; + bool var_12400_transpose_y_1 = const()[name = string("op_12400_transpose_y_1"), val = bool(true)]; + tensor var_12400 = matmul(transpose_x = var_12400_transpose_x_1, transpose_y = var_12400_transpose_y_1, x = query_states_69_cast_fp16, y = x_277_cast_fp16)[name = string("op_12400")]; + fp16 var_12401_to_fp16 = const()[name = string("op_12401_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_103_cast_fp16 = mul(x = var_12400, y = var_12401_to_fp16)[name = string("attn_weights_103_cast_fp16")]; + tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; + int32 var_12436 = const()[name = string("op_12436"), val = int32(-1)]; + tensor attn_weights_107_cast_fp16 = softmax(axis = var_12436, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; + bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; + bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; + tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = x_283_cast_fp16)[name = string("attn_output_171_cast_fp16")]; + tensor var_12447_perm_0 = const()[name = string("op_12447_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12451 = const()[name = string("op_12451"), val = tensor([1, 1, 1024])]; + tensor var_12447_cast_fp16 = transpose(perm = var_12447_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_69")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_12451, x = var_12447_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_12456 = const()[name = string("op_12456"), val = tensor([0, 2, 1])]; + string var_12472_pad_type_0 = const()[name = string("op_12472_pad_type_0"), val = string("valid")]; + int32 var_12472_groups_0 = const()[name = string("op_12472_groups_0"), val = int32(1)]; + tensor var_12472_strides_0 = const()[name = string("op_12472_strides_0"), val = tensor([1])]; + tensor var_12472_pad_0 = const()[name = string("op_12472_pad_0"), val = tensor([0, 0])]; + tensor var_12472_dilations_0 = const()[name = string("op_12472_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717576768))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12457_cast_fp16 = transpose(perm = var_12456, x = attn_output_175_cast_fp16)[name = string("transpose_68")]; + tensor var_12472_cast_fp16 = conv(dilations = var_12472_dilations_0, groups = var_12472_groups_0, pad = var_12472_pad_0, pad_type = var_12472_pad_type_0, strides = var_12472_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_12457_cast_fp16)[name = string("op_12472_cast_fp16")]; + tensor var_12476 = const()[name = string("op_12476"), val = tensor([0, 2, 1])]; + int32 var_12487 = const()[name = string("op_12487"), val = int32(-1)]; + fp16 const_672_promoted_to_fp16 = const()[name = string("const_672_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_285_cast_fp16 = transpose(perm = var_12476, x = var_12472_cast_fp16)[name = string("transpose_67")]; + tensor var_12489_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_672_promoted_to_fp16)[name = string("op_12489_cast_fp16")]; + bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; + tensor input_351_cast_fp16 = concat(axis = var_12487, interleave = input_351_interleave_0, values = (hidden_states_285_cast_fp16, var_12489_cast_fp16))[name = string("input_351_cast_fp16")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_12484_to_fp16 = const()[name = string("op_12484_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_12484_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; + tensor var_12503_to_fp16 = const()[name = string("op_12503_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717613696)))]; + tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_12503_to_fp16)[name = string("attn_output_179_cast_fp16")]; + tensor hidden_states_287_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; + int32 var_12516 = const()[name = string("op_12516"), val = int32(-1)]; + fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12518_cast_fp16 = mul(x = hidden_states_287_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12518_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_12516, interleave = input_353_interleave_0, values = (hidden_states_287_cast_fp16, var_12518_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_12513_to_fp16 = const()[name = string("op_12513_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_12513_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; + tensor var_12532_to_fp16 = const()[name = string("op_12532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717616064)))]; + tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_12532_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_12544 = const()[name = string("op_12544"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_12545_cast_fp16 = transpose(perm = var_12544, x = x_285_cast_fp16)[name = string("transpose_66")]; + tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_12545_cast_fp16)[name = string("input_355_cast_fp16")]; + string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; + tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; + tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; + int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717618432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723590464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729783744))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; + string var_12570_mode_0 = const()[name = string("op_12570_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12570_cast_fp16 = gelu(mode = var_12570_mode_0, x = x_287_cast_fp16)[name = string("op_12570_cast_fp16")]; + tensor input_357_cast_fp16 = mul(x = var_12570_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730004992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735977024))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; + tensor var_12578_axes_0 = const()[name = string("op_12578_axes_0"), val = tensor([2])]; + tensor var_12578_cast_fp16 = squeeze(axes = var_12578_axes_0, x = e_35_cast_fp16)[name = string("op_12578_cast_fp16")]; + tensor var_12579 = const()[name = string("op_12579"), val = tensor([0, 2, 1])]; + int32 var_12590 = const()[name = string("op_12590"), val = int32(-1)]; + fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_289_cast_fp16 = transpose(perm = var_12579, x = var_12578_cast_fp16)[name = string("transpose_65")]; + tensor var_12592_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12592_cast_fp16")]; + bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; + tensor input_359_cast_fp16 = concat(axis = var_12590, interleave = input_359_interleave_0, values = (hidden_states_289_cast_fp16, var_12592_cast_fp16))[name = string("input_359_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_12587_to_fp16 = const()[name = string("op_12587_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_12587_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor var_12606_to_fp16 = const()[name = string("op_12606_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736013952)))]; + tensor hidden_states_291_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_12606_to_fp16)[name = string("hidden_states_291_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = add(x = hidden_states_287_cast_fp16, y = hidden_states_291_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + int32 var_12657 = const()[name = string("op_12657"), val = int32(-1)]; + fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12659_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12659_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_12657, interleave = input_361_interleave_0, values = (hidden_states_293_cast_fp16, var_12659_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_12654_to_fp16 = const()[name = string("op_12654_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_12654_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor var_12673_to_fp16 = const()[name = string("op_12673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736016320)))]; + tensor hidden_states_295_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_12673_to_fp16)[name = string("hidden_states_295_cast_fp16")]; + tensor var_12678 = const()[name = string("op_12678"), val = tensor([0, 2, 1])]; + tensor var_12681_axes_0 = const()[name = string("op_12681_axes_0"), val = tensor([2])]; + tensor var_12679_cast_fp16 = transpose(perm = var_12678, x = hidden_states_295_cast_fp16)[name = string("transpose_64")]; + tensor var_12681_cast_fp16 = expand_dims(axes = var_12681_axes_0, x = var_12679_cast_fp16)[name = string("op_12681_cast_fp16")]; + string var_12697_pad_type_0 = const()[name = string("op_12697_pad_type_0"), val = string("valid")]; + tensor var_12697_strides_0 = const()[name = string("op_12697_strides_0"), val = tensor([1, 1])]; + tensor var_12697_pad_0 = const()[name = string("op_12697_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12697_dilations_0 = const()[name = string("op_12697_dilations_0"), val = tensor([1, 1])]; + int32 var_12697_groups_0 = const()[name = string("op_12697_groups_0"), val = int32(1)]; + tensor var_12697 = conv(dilations = var_12697_dilations_0, groups = var_12697_groups_0, pad = var_12697_pad_0, pad_type = var_12697_pad_type_0, strides = var_12697_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_12681_cast_fp16)[name = string("op_12697")]; + tensor var_12702 = const()[name = string("op_12702"), val = tensor([1, 4, 1, 256])]; + tensor var_12703 = reshape(shape = var_12702, x = var_12697)[name = string("op_12703")]; + string var_12719_pad_type_0 = const()[name = string("op_12719_pad_type_0"), val = string("valid")]; + tensor var_12719_strides_0 = const()[name = string("op_12719_strides_0"), val = tensor([1, 1])]; + tensor var_12719_pad_0 = const()[name = string("op_12719_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12719_dilations_0 = const()[name = string("op_12719_dilations_0"), val = tensor([1, 1])]; + int32 var_12719_groups_0 = const()[name = string("op_12719_groups_0"), val = int32(1)]; + tensor var_12719 = conv(dilations = var_12719_dilations_0, groups = var_12719_groups_0, pad = var_12719_pad_0, pad_type = var_12719_pad_type_0, strides = var_12719_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_12681_cast_fp16)[name = string("op_12719")]; + tensor var_12724 = const()[name = string("op_12724"), val = tensor([1, 1, 1, 256])]; + tensor var_12725 = reshape(shape = var_12724, x = var_12719)[name = string("op_12725")]; + string var_12741_pad_type_0 = const()[name = string("op_12741_pad_type_0"), val = string("valid")]; + tensor var_12741_strides_0 = const()[name = string("op_12741_strides_0"), val = tensor([1, 1])]; + tensor var_12741_pad_0 = const()[name = string("op_12741_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12741_dilations_0 = const()[name = string("op_12741_dilations_0"), val = tensor([1, 1])]; + int32 var_12741_groups_0 = const()[name = string("op_12741_groups_0"), val = int32(1)]; + tensor var_12741 = conv(dilations = var_12741_dilations_0, groups = var_12741_groups_0, pad = var_12741_pad_0, pad_type = var_12741_pad_type_0, strides = var_12741_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_12681_cast_fp16)[name = string("op_12741")]; + tensor var_12746 = const()[name = string("op_12746"), val = tensor([1, 1, 1, 256])]; + tensor var_12747 = reshape(shape = var_12746, x = var_12741)[name = string("op_12747")]; + int32 var_12762 = const()[name = string("op_12762"), val = int32(-1)]; + fp16 const_688_promoted = const()[name = string("const_688_promoted"), val = fp16(-0x1p+0)]; + tensor var_12764 = mul(x = var_12703, y = const_688_promoted)[name = string("op_12764")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_12762, interleave = input_365_interleave_0, values = (var_12703, var_12764))[name = string("input_365")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_12759_to_fp16 = const()[name = string("op_12759_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_12759_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor var_12778_to_fp16 = const()[name = string("op_12778_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018688)))]; + tensor q_37_cast_fp16 = mul(x = normed_439, y = var_12778_to_fp16)[name = string("q_37_cast_fp16")]; + int32 var_12789 = const()[name = string("op_12789"), val = int32(-1)]; + fp16 const_692_promoted = const()[name = string("const_692_promoted"), val = fp16(-0x1p+0)]; + tensor var_12791 = mul(x = var_12725, y = const_692_promoted)[name = string("op_12791")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_12789, interleave = input_367_interleave_0, values = (var_12725, var_12791))[name = string("input_367")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_12786_to_fp16 = const()[name = string("op_12786_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_12786_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor var_12805_to_fp16 = const()[name = string("op_12805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019264)))]; + tensor k_37_cast_fp16 = mul(x = normed_443, y = var_12805_to_fp16)[name = string("k_37_cast_fp16")]; + tensor var_12807_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12807_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_698_promoted_to_fp16 = const()[name = string("const_698_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12828_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_698_promoted_to_fp16)[name = string("op_12828_cast_fp16")]; + int32 var_12830 = const()[name = string("op_12830"), val = int32(-1)]; + bool var_12831_interleave_0 = const()[name = string("op_12831_interleave_0"), val = bool(false)]; + tensor var_12831_cast_fp16 = concat(axis = var_12830, interleave = var_12831_interleave_0, values = (var_12828_cast_fp16, x1_73_cast_fp16))[name = string("op_12831_cast_fp16")]; + tensor var_12832_cast_fp16 = mul(x = var_12831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_12832_cast_fp16")]; + tensor query_states_73_cast_fp16 = add(x = var_12807_cast_fp16, y = var_12832_cast_fp16)[name = string("query_states_73_cast_fp16")]; + tensor var_12835_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12835_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_701_promoted_to_fp16 = const()[name = string("const_701_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12856_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_701_promoted_to_fp16)[name = string("op_12856_cast_fp16")]; + int32 var_12858 = const()[name = string("op_12858"), val = int32(-1)]; + bool var_12859_interleave_0 = const()[name = string("op_12859_interleave_0"), val = bool(false)]; + tensor var_12859_cast_fp16 = concat(axis = var_12858, interleave = var_12859_interleave_0, values = (var_12856_cast_fp16, x1_75_cast_fp16))[name = string("op_12859_cast_fp16")]; + tensor var_12860_cast_fp16 = mul(x = var_12859_cast_fp16, y = sin_1_cast_fp16)[name = string("op_12860_cast_fp16")]; + tensor key_states_73_cast_fp16 = add(x = var_12835_cast_fp16, y = var_12860_cast_fp16)[name = string("key_states_73_cast_fp16")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([15])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([16])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_220, concat_147_values1_0, var_1955, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = key_states_73_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_36")]; + tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([37])]; + tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; + tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([38])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_226, concat_151_values1_0, var_1955, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = var_12747, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_37")]; + tensor var_12915_begin_0 = const()[name = string("op_12915_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_12915_end_0 = const()[name = string("op_12915_end_0"), val = tensor([16, 1, 512, 256])]; + tensor var_12915_end_mask_0 = const()[name = string("op_12915_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12915_cast_fp16 = slice_by_index(begin = var_12915_begin_0, end = var_12915_end_0, end_mask = var_12915_end_mask_0, x = coreml_update_state_89)[name = string("op_12915_cast_fp16")]; + tensor var_12922_begin_0 = const()[name = string("op_12922_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_12922_end_0 = const()[name = string("op_12922_end_0"), val = tensor([38, 1, 512, 256])]; + tensor var_12922_end_mask_0 = const()[name = string("op_12922_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12922_cast_fp16 = slice_by_index(begin = var_12922_begin_0, end = var_12922_end_0, end_mask = var_12922_end_mask_0, x = coreml_update_state_89)[name = string("op_12922_cast_fp16")]; + tensor var_12959 = const()[name = string("op_12959"), val = tensor([1, 4, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_12959, x = var_12915_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_12979 = const()[name = string("op_12979"), val = tensor([1, 4, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_12979, x = var_12922_cast_fp16)[name = string("x_299_cast_fp16")]; + bool var_13006_transpose_x_1 = const()[name = string("op_13006_transpose_x_1"), val = bool(false)]; + bool var_13006_transpose_y_1 = const()[name = string("op_13006_transpose_y_1"), val = bool(true)]; + tensor var_13006 = matmul(transpose_x = var_13006_transpose_x_1, transpose_y = var_13006_transpose_y_1, x = query_states_73_cast_fp16, y = x_293_cast_fp16)[name = string("op_13006")]; + fp16 var_13007_to_fp16 = const()[name = string("op_13007_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_109_cast_fp16 = mul(x = var_13006, y = var_13007_to_fp16)[name = string("attn_weights_109_cast_fp16")]; + tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = var_2129)[name = string("attn_weights_111_cast_fp16")]; + int32 var_13042 = const()[name = string("op_13042"), val = int32(-1)]; + tensor attn_weights_113_cast_fp16 = softmax(axis = var_13042, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; + bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; + bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; + tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = x_299_cast_fp16)[name = string("attn_output_181_cast_fp16")]; + tensor var_13053_perm_0 = const()[name = string("op_13053_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13057 = const()[name = string("op_13057"), val = tensor([1, 1, 1024])]; + tensor var_13053_cast_fp16 = transpose(perm = var_13053_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_63")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_13057, x = var_13053_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_13062 = const()[name = string("op_13062"), val = tensor([0, 2, 1])]; + string var_13078_pad_type_0 = const()[name = string("op_13078_pad_type_0"), val = string("valid")]; + int32 var_13078_groups_0 = const()[name = string("op_13078_groups_0"), val = int32(1)]; + tensor var_13078_strides_0 = const()[name = string("op_13078_strides_0"), val = tensor([1])]; + tensor var_13078_pad_0 = const()[name = string("op_13078_pad_0"), val = tensor([0, 0])]; + tensor var_13078_dilations_0 = const()[name = string("op_13078_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736904640))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13063_cast_fp16 = transpose(perm = var_13062, x = attn_output_185_cast_fp16)[name = string("transpose_62")]; + tensor var_13078_cast_fp16 = conv(dilations = var_13078_dilations_0, groups = var_13078_groups_0, pad = var_13078_pad_0, pad_type = var_13078_pad_type_0, strides = var_13078_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_13063_cast_fp16)[name = string("op_13078_cast_fp16")]; + tensor var_13082 = const()[name = string("op_13082"), val = tensor([0, 2, 1])]; + int32 var_13093 = const()[name = string("op_13093"), val = int32(-1)]; + fp16 const_710_promoted_to_fp16 = const()[name = string("const_710_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_301_cast_fp16 = transpose(perm = var_13082, x = var_13078_cast_fp16)[name = string("transpose_61")]; + tensor var_13095_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_710_promoted_to_fp16)[name = string("op_13095_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_13093, interleave = input_371_interleave_0, values = (hidden_states_301_cast_fp16, var_13095_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_13090_to_fp16 = const()[name = string("op_13090_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_13090_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; + tensor var_13109_to_fp16 = const()[name = string("op_13109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736941568)))]; + tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_13109_to_fp16)[name = string("attn_output_189_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = add(x = hidden_states_293_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + int32 var_13122 = const()[name = string("op_13122"), val = int32(-1)]; + fp16 const_714_promoted_to_fp16 = const()[name = string("const_714_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13124_cast_fp16 = mul(x = hidden_states_303_cast_fp16, y = const_714_promoted_to_fp16)[name = string("op_13124_cast_fp16")]; + bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; + tensor input_373_cast_fp16 = concat(axis = var_13122, interleave = input_373_interleave_0, values = (hidden_states_303_cast_fp16, var_13124_cast_fp16))[name = string("input_373_cast_fp16")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_13119_to_fp16 = const()[name = string("op_13119_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_13119_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; + tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; + tensor var_13138_to_fp16 = const()[name = string("op_13138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736943936)))]; + tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_13138_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_13150 = const()[name = string("op_13150"), val = tensor([0, 2, 1])]; + tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; + tensor var_13151_cast_fp16 = transpose(perm = var_13150, x = x_301_cast_fp16)[name = string("transpose_60")]; + tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_13151_cast_fp16)[name = string("input_375_cast_fp16")]; + string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; + tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; + tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; + int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736946304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742918336))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743139584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749111616))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; + string var_13176_mode_0 = const()[name = string("op_13176_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_13176_cast_fp16 = gelu(mode = var_13176_mode_0, x = x_303_cast_fp16)[name = string("op_13176_cast_fp16")]; + tensor input_377_cast_fp16 = mul(x = var_13176_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749332864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755304896))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; + tensor var_13184_axes_0 = const()[name = string("op_13184_axes_0"), val = tensor([2])]; + tensor var_13184_cast_fp16 = squeeze(axes = var_13184_axes_0, x = e_37_cast_fp16)[name = string("op_13184_cast_fp16")]; + tensor var_13185 = const()[name = string("op_13185"), val = tensor([0, 2, 1])]; + int32 var_13196 = const()[name = string("op_13196"), val = int32(-1)]; + fp16 const_718_promoted_to_fp16 = const()[name = string("const_718_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_305_cast_fp16 = transpose(perm = var_13185, x = var_13184_cast_fp16)[name = string("transpose_59")]; + tensor var_13198_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_718_promoted_to_fp16)[name = string("op_13198_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_13196, interleave = input_379_interleave_0, values = (hidden_states_305_cast_fp16, var_13198_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; + fp16 var_13193_to_fp16 = const()[name = string("op_13193_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_13193_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; + tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; + tensor var_13212_to_fp16 = const()[name = string("op_13212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755341824)))]; + tensor hidden_states_307_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_13212_to_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor hidden_states_309_cast_fp16 = add(x = hidden_states_303_cast_fp16, y = hidden_states_307_cast_fp16)[name = string("hidden_states_309_cast_fp16")]; + int32 var_13263 = const()[name = string("op_13263"), val = int32(-1)]; + fp16 const_722_promoted_to_fp16 = const()[name = string("const_722_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13265_cast_fp16 = mul(x = hidden_states_309_cast_fp16, y = const_722_promoted_to_fp16)[name = string("op_13265_cast_fp16")]; + bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; + tensor input_381_cast_fp16 = concat(axis = var_13263, interleave = input_381_interleave_0, values = (hidden_states_309_cast_fp16, var_13265_cast_fp16))[name = string("input_381_cast_fp16")]; + tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; + fp16 var_13260_to_fp16 = const()[name = string("op_13260_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_13260_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; + tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; + tensor var_13279_to_fp16 = const()[name = string("op_13279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755344192)))]; + tensor hidden_states_311_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_13279_to_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor var_13284 = const()[name = string("op_13284"), val = tensor([0, 2, 1])]; + tensor var_13287_axes_0 = const()[name = string("op_13287_axes_0"), val = tensor([2])]; + tensor var_13285_cast_fp16 = transpose(perm = var_13284, x = hidden_states_311_cast_fp16)[name = string("transpose_58")]; + tensor var_13287_cast_fp16 = expand_dims(axes = var_13287_axes_0, x = var_13285_cast_fp16)[name = string("op_13287_cast_fp16")]; + string var_13303_pad_type_0 = const()[name = string("op_13303_pad_type_0"), val = string("valid")]; + tensor var_13303_strides_0 = const()[name = string("op_13303_strides_0"), val = tensor([1, 1])]; + tensor var_13303_pad_0 = const()[name = string("op_13303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13303_dilations_0 = const()[name = string("op_13303_dilations_0"), val = tensor([1, 1])]; + int32 var_13303_groups_0 = const()[name = string("op_13303_groups_0"), val = int32(1)]; + tensor var_13303 = conv(dilations = var_13303_dilations_0, groups = var_13303_groups_0, pad = var_13303_pad_0, pad_type = var_13303_pad_type_0, strides = var_13303_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_13287_cast_fp16)[name = string("op_13303")]; + tensor var_13308 = const()[name = string("op_13308"), val = tensor([1, 4, 1, 256])]; + tensor var_13309 = reshape(shape = var_13308, x = var_13303)[name = string("op_13309")]; + string var_13325_pad_type_0 = const()[name = string("op_13325_pad_type_0"), val = string("valid")]; + tensor var_13325_strides_0 = const()[name = string("op_13325_strides_0"), val = tensor([1, 1])]; + tensor var_13325_pad_0 = const()[name = string("op_13325_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13325_dilations_0 = const()[name = string("op_13325_dilations_0"), val = tensor([1, 1])]; + int32 var_13325_groups_0 = const()[name = string("op_13325_groups_0"), val = int32(1)]; + tensor var_13325 = conv(dilations = var_13325_dilations_0, groups = var_13325_groups_0, pad = var_13325_pad_0, pad_type = var_13325_pad_type_0, strides = var_13325_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_13287_cast_fp16)[name = string("op_13325")]; + tensor var_13330 = const()[name = string("op_13330"), val = tensor([1, 1, 1, 256])]; + tensor var_13331 = reshape(shape = var_13330, x = var_13325)[name = string("op_13331")]; + string var_13347_pad_type_0 = const()[name = string("op_13347_pad_type_0"), val = string("valid")]; + tensor var_13347_strides_0 = const()[name = string("op_13347_strides_0"), val = tensor([1, 1])]; + tensor var_13347_pad_0 = const()[name = string("op_13347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13347_dilations_0 = const()[name = string("op_13347_dilations_0"), val = tensor([1, 1])]; + int32 var_13347_groups_0 = const()[name = string("op_13347_groups_0"), val = int32(1)]; + tensor var_13347 = conv(dilations = var_13347_dilations_0, groups = var_13347_groups_0, pad = var_13347_pad_0, pad_type = var_13347_pad_type_0, strides = var_13347_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_13287_cast_fp16)[name = string("op_13347")]; + tensor var_13352 = const()[name = string("op_13352"), val = tensor([1, 1, 1, 256])]; + tensor var_13353 = reshape(shape = var_13352, x = var_13347)[name = string("op_13353")]; + int32 var_13368 = const()[name = string("op_13368"), val = int32(-1)]; + fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; + tensor var_13370 = mul(x = var_13309, y = const_726_promoted)[name = string("op_13370")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_13368, interleave = input_385_interleave_0, values = (var_13309, var_13370))[name = string("input_385")]; + tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; + fp16 var_13365_to_fp16 = const()[name = string("op_13365_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_13365_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; + tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; + tensor var_13384_to_fp16 = const()[name = string("op_13384_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755346560)))]; + tensor q_39_cast_fp16 = mul(x = normed_463, y = var_13384_to_fp16)[name = string("q_39_cast_fp16")]; + int32 var_13395 = const()[name = string("op_13395"), val = int32(-1)]; + fp16 const_730_promoted = const()[name = string("const_730_promoted"), val = fp16(-0x1p+0)]; + tensor var_13397 = mul(x = var_13331, y = const_730_promoted)[name = string("op_13397")]; + bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; + tensor input_387 = concat(axis = var_13395, interleave = input_387_interleave_0, values = (var_13331, var_13397))[name = string("input_387")]; + tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; + fp16 var_13392_to_fp16 = const()[name = string("op_13392_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_13392_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; + tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; + tensor var_13411_to_fp16 = const()[name = string("op_13411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347136)))]; + tensor k_39_cast_fp16 = mul(x = normed_467, y = var_13411_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_13413_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13413_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_736_promoted_to_fp16 = const()[name = string("const_736_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13434_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_736_promoted_to_fp16)[name = string("op_13434_cast_fp16")]; + int32 var_13436 = const()[name = string("op_13436"), val = int32(-1)]; + bool var_13437_interleave_0 = const()[name = string("op_13437_interleave_0"), val = bool(false)]; + tensor var_13437_cast_fp16 = concat(axis = var_13436, interleave = var_13437_interleave_0, values = (var_13434_cast_fp16, x1_77_cast_fp16))[name = string("op_13437_cast_fp16")]; + tensor var_13438_cast_fp16 = mul(x = var_13437_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13438_cast_fp16")]; + tensor query_states_77_cast_fp16 = add(x = var_13413_cast_fp16, y = var_13438_cast_fp16)[name = string("query_states_77_cast_fp16")]; + tensor var_13441_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13441_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_739_promoted_to_fp16 = const()[name = string("const_739_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13462_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_739_promoted_to_fp16)[name = string("op_13462_cast_fp16")]; + int32 var_13464 = const()[name = string("op_13464"), val = int32(-1)]; + bool var_13465_interleave_0 = const()[name = string("op_13465_interleave_0"), val = bool(false)]; + tensor var_13465_cast_fp16 = concat(axis = var_13464, interleave = var_13465_interleave_0, values = (var_13462_cast_fp16, x1_79_cast_fp16))[name = string("op_13465_cast_fp16")]; + tensor var_13466_cast_fp16 = mul(x = var_13465_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13466_cast_fp16")]; + tensor key_states_77_cast_fp16 = add(x = var_13441_cast_fp16, y = var_13466_cast_fp16)[name = string("key_states_77_cast_fp16")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([16])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([17])]; + int32 concat_154_axis_0 = const()[name = string("concat_154_axis_0"), val = int32(0)]; + bool concat_154_interleave_0 = const()[name = string("concat_154_interleave_0"), val = bool(false)]; + tensor concat_154 = concat(axis = concat_154_axis_0, interleave = concat_154_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_154")]; + tensor concat_155_values1_0 = const()[name = string("concat_155_values1_0"), val = tensor([0])]; + tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; + int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; + bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; + tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_232, concat_155_values1_0, var_1955, concat_155_values3_0))[name = string("concat_155")]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_154, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_155, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = key_states_77_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_38")]; + tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([38])]; + tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; + tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; + tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([39])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_158")]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (expand_dims_238, concat_159_values1_0, var_1955, concat_159_values3_0))[name = string("concat_159")]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_158, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_159, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = var_13353, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_39")]; + tensor var_13521_begin_0 = const()[name = string("op_13521_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_13521_end_0 = const()[name = string("op_13521_end_0"), val = tensor([17, 1, 512, 256])]; + tensor var_13521_end_mask_0 = const()[name = string("op_13521_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13521_cast_fp16 = slice_by_index(begin = var_13521_begin_0, end = var_13521_end_0, end_mask = var_13521_end_mask_0, x = coreml_update_state_91)[name = string("op_13521_cast_fp16")]; + tensor var_13528_begin_0 = const()[name = string("op_13528_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_13528_end_0 = const()[name = string("op_13528_end_0"), val = tensor([39, 1, 512, 256])]; + tensor var_13528_end_mask_0 = const()[name = string("op_13528_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13528_cast_fp16 = slice_by_index(begin = var_13528_begin_0, end = var_13528_end_0, end_mask = var_13528_end_mask_0, x = coreml_update_state_91)[name = string("op_13528_cast_fp16")]; + tensor var_13565 = const()[name = string("op_13565"), val = tensor([1, 4, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_13565, x = var_13521_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_13585 = const()[name = string("op_13585"), val = tensor([1, 4, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_13585, x = var_13528_cast_fp16)[name = string("x_315_cast_fp16")]; + bool var_13612_transpose_x_1 = const()[name = string("op_13612_transpose_x_1"), val = bool(false)]; + bool var_13612_transpose_y_1 = const()[name = string("op_13612_transpose_y_1"), val = bool(true)]; + tensor var_13612 = matmul(transpose_x = var_13612_transpose_x_1, transpose_y = var_13612_transpose_y_1, x = query_states_77_cast_fp16, y = x_309_cast_fp16)[name = string("op_13612")]; + fp16 var_13613_to_fp16 = const()[name = string("op_13613_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_115_cast_fp16 = mul(x = var_13612, y = var_13613_to_fp16)[name = string("attn_weights_115_cast_fp16")]; + tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = var_2129)[name = string("attn_weights_117_cast_fp16")]; + int32 var_13648 = const()[name = string("op_13648"), val = int32(-1)]; + tensor attn_weights_119_cast_fp16 = softmax(axis = var_13648, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; + bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; + bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; + tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = x_315_cast_fp16)[name = string("attn_output_191_cast_fp16")]; + tensor var_13659_perm_0 = const()[name = string("op_13659_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13663 = const()[name = string("op_13663"), val = tensor([1, 1, 1024])]; + tensor var_13659_cast_fp16 = transpose(perm = var_13659_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_57")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_13663, x = var_13659_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_13668 = const()[name = string("op_13668"), val = tensor([0, 2, 1])]; + string var_13684_pad_type_0 = const()[name = string("op_13684_pad_type_0"), val = string("valid")]; + int32 var_13684_groups_0 = const()[name = string("op_13684_groups_0"), val = int32(1)]; + tensor var_13684_strides_0 = const()[name = string("op_13684_strides_0"), val = tensor([1])]; + tensor var_13684_pad_0 = const()[name = string("op_13684_pad_0"), val = tensor([0, 0])]; + tensor var_13684_dilations_0 = const()[name = string("op_13684_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756232512))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13669_cast_fp16 = transpose(perm = var_13668, x = attn_output_195_cast_fp16)[name = string("transpose_56")]; + tensor var_13684_cast_fp16 = conv(dilations = var_13684_dilations_0, groups = var_13684_groups_0, pad = var_13684_pad_0, pad_type = var_13684_pad_type_0, strides = var_13684_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_13669_cast_fp16)[name = string("op_13684_cast_fp16")]; + tensor var_13688 = const()[name = string("op_13688"), val = tensor([0, 2, 1])]; + int32 var_13699 = const()[name = string("op_13699"), val = int32(-1)]; + fp16 const_748_promoted_to_fp16 = const()[name = string("const_748_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_317_cast_fp16 = transpose(perm = var_13688, x = var_13684_cast_fp16)[name = string("transpose_55")]; + tensor var_13701_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_748_promoted_to_fp16)[name = string("op_13701_cast_fp16")]; + bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; + tensor input_391_cast_fp16 = concat(axis = var_13699, interleave = input_391_interleave_0, values = (hidden_states_317_cast_fp16, var_13701_cast_fp16))[name = string("input_391_cast_fp16")]; + tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; + fp16 var_13696_to_fp16 = const()[name = string("op_13696_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_13696_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; + tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; + tensor var_13715_to_fp16 = const()[name = string("op_13715_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756269440)))]; + tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_13715_to_fp16)[name = string("attn_output_199_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_309_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + int32 var_13728 = const()[name = string("op_13728"), val = int32(-1)]; + fp16 const_752_promoted_to_fp16 = const()[name = string("const_752_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13730_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = const_752_promoted_to_fp16)[name = string("op_13730_cast_fp16")]; + bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; + tensor input_393_cast_fp16 = concat(axis = var_13728, interleave = input_393_interleave_0, values = (hidden_states_319_cast_fp16, var_13730_cast_fp16))[name = string("input_393_cast_fp16")]; + tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; + fp16 var_13725_to_fp16 = const()[name = string("op_13725_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_13725_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; + tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; + tensor var_13744_to_fp16 = const()[name = string("op_13744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756271808)))]; + tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_13744_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_13756 = const()[name = string("op_13756"), val = tensor([0, 2, 1])]; + tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; + tensor var_13757_cast_fp16 = transpose(perm = var_13756, x = x_317_cast_fp16)[name = string("transpose_54")]; + tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_13757_cast_fp16)[name = string("input_395_cast_fp16")]; + string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; + tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; + tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; + int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756274176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762246208))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762467456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768439488))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; + string var_13782_mode_0 = const()[name = string("op_13782_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_13782_cast_fp16 = gelu(mode = var_13782_mode_0, x = x_319_cast_fp16)[name = string("op_13782_cast_fp16")]; + tensor input_397_cast_fp16 = mul(x = var_13782_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768660736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774632768))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; + tensor var_13790_axes_0 = const()[name = string("op_13790_axes_0"), val = tensor([2])]; + tensor var_13790_cast_fp16 = squeeze(axes = var_13790_axes_0, x = e_39_cast_fp16)[name = string("op_13790_cast_fp16")]; + tensor var_13791 = const()[name = string("op_13791"), val = tensor([0, 2, 1])]; + int32 var_13802 = const()[name = string("op_13802"), val = int32(-1)]; + fp16 const_756_promoted_to_fp16 = const()[name = string("const_756_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_321_cast_fp16 = transpose(perm = var_13791, x = var_13790_cast_fp16)[name = string("transpose_53")]; + tensor var_13804_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_756_promoted_to_fp16)[name = string("op_13804_cast_fp16")]; + bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; + tensor input_399_cast_fp16 = concat(axis = var_13802, interleave = input_399_interleave_0, values = (hidden_states_321_cast_fp16, var_13804_cast_fp16))[name = string("input_399_cast_fp16")]; + tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; + fp16 var_13799_to_fp16 = const()[name = string("op_13799_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_13799_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; + tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; + tensor var_13818_to_fp16 = const()[name = string("op_13818_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774669696)))]; + tensor hidden_states_323_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_13818_to_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor hidden_states_325_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = hidden_states_323_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; + int32 var_13869 = const()[name = string("op_13869"), val = int32(-1)]; + fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13871_cast_fp16 = mul(x = hidden_states_325_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_13871_cast_fp16")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401_cast_fp16 = concat(axis = var_13869, interleave = input_401_interleave_0, values = (hidden_states_325_cast_fp16, var_13871_cast_fp16))[name = string("input_401_cast_fp16")]; + tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; + fp16 var_13866_to_fp16 = const()[name = string("op_13866_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_13866_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; + tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; + tensor var_13885_to_fp16 = const()[name = string("op_13885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774672064)))]; + tensor hidden_states_327_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_13885_to_fp16)[name = string("hidden_states_327_cast_fp16")]; + tensor var_13890 = const()[name = string("op_13890"), val = tensor([0, 2, 1])]; + tensor var_13893_axes_0 = const()[name = string("op_13893_axes_0"), val = tensor([2])]; + tensor var_13891_cast_fp16 = transpose(perm = var_13890, x = hidden_states_327_cast_fp16)[name = string("transpose_52")]; + tensor var_13893_cast_fp16 = expand_dims(axes = var_13893_axes_0, x = var_13891_cast_fp16)[name = string("op_13893_cast_fp16")]; + string var_13909_pad_type_0 = const()[name = string("op_13909_pad_type_0"), val = string("valid")]; + tensor var_13909_strides_0 = const()[name = string("op_13909_strides_0"), val = tensor([1, 1])]; + tensor var_13909_pad_0 = const()[name = string("op_13909_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13909_dilations_0 = const()[name = string("op_13909_dilations_0"), val = tensor([1, 1])]; + int32 var_13909_groups_0 = const()[name = string("op_13909_groups_0"), val = int32(1)]; + tensor var_13909 = conv(dilations = var_13909_dilations_0, groups = var_13909_groups_0, pad = var_13909_pad_0, pad_type = var_13909_pad_type_0, strides = var_13909_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_13893_cast_fp16)[name = string("op_13909")]; + tensor var_13914 = const()[name = string("op_13914"), val = tensor([1, 4, 1, 256])]; + tensor var_13915 = reshape(shape = var_13914, x = var_13909)[name = string("op_13915")]; + string var_13931_pad_type_0 = const()[name = string("op_13931_pad_type_0"), val = string("valid")]; + tensor var_13931_strides_0 = const()[name = string("op_13931_strides_0"), val = tensor([1, 1])]; + tensor var_13931_pad_0 = const()[name = string("op_13931_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13931_dilations_0 = const()[name = string("op_13931_dilations_0"), val = tensor([1, 1])]; + int32 var_13931_groups_0 = const()[name = string("op_13931_groups_0"), val = int32(1)]; + tensor var_13931 = conv(dilations = var_13931_dilations_0, groups = var_13931_groups_0, pad = var_13931_pad_0, pad_type = var_13931_pad_type_0, strides = var_13931_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_13893_cast_fp16)[name = string("op_13931")]; + tensor var_13936 = const()[name = string("op_13936"), val = tensor([1, 1, 1, 256])]; + tensor var_13937 = reshape(shape = var_13936, x = var_13931)[name = string("op_13937")]; + string var_13953_pad_type_0 = const()[name = string("op_13953_pad_type_0"), val = string("valid")]; + tensor var_13953_strides_0 = const()[name = string("op_13953_strides_0"), val = tensor([1, 1])]; + tensor var_13953_pad_0 = const()[name = string("op_13953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13953_dilations_0 = const()[name = string("op_13953_dilations_0"), val = tensor([1, 1])]; + int32 var_13953_groups_0 = const()[name = string("op_13953_groups_0"), val = int32(1)]; + tensor var_13953 = conv(dilations = var_13953_dilations_0, groups = var_13953_groups_0, pad = var_13953_pad_0, pad_type = var_13953_pad_type_0, strides = var_13953_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_13893_cast_fp16)[name = string("op_13953")]; + tensor var_13958 = const()[name = string("op_13958"), val = tensor([1, 1, 1, 256])]; + tensor var_13959 = reshape(shape = var_13958, x = var_13953)[name = string("op_13959")]; + int32 var_13974 = const()[name = string("op_13974"), val = int32(-1)]; + fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; + tensor var_13976 = mul(x = var_13915, y = const_764_promoted)[name = string("op_13976")]; + bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; + tensor input_405 = concat(axis = var_13974, interleave = input_405_interleave_0, values = (var_13915, var_13976))[name = string("input_405")]; + tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; + fp16 var_13971_to_fp16 = const()[name = string("op_13971_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_13971_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; + tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; + tensor var_13990_to_fp16 = const()[name = string("op_13990_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774674432)))]; + tensor q_41_cast_fp16 = mul(x = normed_487, y = var_13990_to_fp16)[name = string("q_41_cast_fp16")]; + int32 var_14001 = const()[name = string("op_14001"), val = int32(-1)]; + fp16 const_768_promoted = const()[name = string("const_768_promoted"), val = fp16(-0x1p+0)]; + tensor var_14003 = mul(x = var_13937, y = const_768_promoted)[name = string("op_14003")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407 = concat(axis = var_14001, interleave = input_407_interleave_0, values = (var_13937, var_14003))[name = string("input_407")]; + tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; + fp16 var_13998_to_fp16 = const()[name = string("op_13998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_13998_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; + tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; + tensor var_14017_to_fp16 = const()[name = string("op_14017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675008)))]; + tensor k_41_cast_fp16 = mul(x = normed_491, y = var_14017_to_fp16)[name = string("k_41_cast_fp16")]; + tensor var_14019_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14019_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_774_promoted_to_fp16 = const()[name = string("const_774_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14040_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_774_promoted_to_fp16)[name = string("op_14040_cast_fp16")]; + int32 var_14042 = const()[name = string("op_14042"), val = int32(-1)]; + bool var_14043_interleave_0 = const()[name = string("op_14043_interleave_0"), val = bool(false)]; + tensor var_14043_cast_fp16 = concat(axis = var_14042, interleave = var_14043_interleave_0, values = (var_14040_cast_fp16, x1_81_cast_fp16))[name = string("op_14043_cast_fp16")]; + tensor var_14044_cast_fp16 = mul(x = var_14043_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14044_cast_fp16")]; + tensor query_states_81_cast_fp16 = add(x = var_14019_cast_fp16, y = var_14044_cast_fp16)[name = string("query_states_81_cast_fp16")]; + tensor var_14047_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14047_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_777_promoted_to_fp16 = const()[name = string("const_777_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14068_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_777_promoted_to_fp16)[name = string("op_14068_cast_fp16")]; + int32 var_14070 = const()[name = string("op_14070"), val = int32(-1)]; + bool var_14071_interleave_0 = const()[name = string("op_14071_interleave_0"), val = bool(false)]; + tensor var_14071_cast_fp16 = concat(axis = var_14070, interleave = var_14071_interleave_0, values = (var_14068_cast_fp16, x1_83_cast_fp16))[name = string("op_14071_cast_fp16")]; + tensor var_14072_cast_fp16 = mul(x = var_14071_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14072_cast_fp16")]; + tensor key_states_81_cast_fp16 = add(x = var_14047_cast_fp16, y = var_14072_cast_fp16)[name = string("key_states_81_cast_fp16")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([17])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([18])]; + int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; + bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; + tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_162")]; + tensor concat_163_values1_0 = const()[name = string("concat_163_values1_0"), val = tensor([0])]; + tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; + int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; + bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; + tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_244, concat_163_values1_0, var_1955, concat_163_values3_0))[name = string("concat_163")]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_162, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_163, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = key_states_81_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_40")]; + tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([39])]; + tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; + tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; + tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([40])]; + int32 concat_166_axis_0 = const()[name = string("concat_166_axis_0"), val = int32(0)]; + bool concat_166_interleave_0 = const()[name = string("concat_166_interleave_0"), val = bool(false)]; + tensor concat_166 = concat(axis = concat_166_axis_0, interleave = concat_166_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_166")]; + tensor concat_167_values1_0 = const()[name = string("concat_167_values1_0"), val = tensor([0])]; + tensor concat_167_values3_0 = const()[name = string("concat_167_values3_0"), val = tensor([0])]; + int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; + bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; + tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (expand_dims_250, concat_167_values1_0, var_1955, concat_167_values3_0))[name = string("concat_167")]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_166, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_167, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = var_13959, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_41")]; + tensor var_14127_begin_0 = const()[name = string("op_14127_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_14127_end_0 = const()[name = string("op_14127_end_0"), val = tensor([18, 1, 512, 256])]; + tensor var_14127_end_mask_0 = const()[name = string("op_14127_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14127_cast_fp16 = slice_by_index(begin = var_14127_begin_0, end = var_14127_end_0, end_mask = var_14127_end_mask_0, x = coreml_update_state_93)[name = string("op_14127_cast_fp16")]; + tensor var_14134_begin_0 = const()[name = string("op_14134_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_14134_end_0 = const()[name = string("op_14134_end_0"), val = tensor([40, 1, 512, 256])]; + tensor var_14134_end_mask_0 = const()[name = string("op_14134_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14134_cast_fp16 = slice_by_index(begin = var_14134_begin_0, end = var_14134_end_0, end_mask = var_14134_end_mask_0, x = coreml_update_state_93)[name = string("op_14134_cast_fp16")]; + tensor var_14171 = const()[name = string("op_14171"), val = tensor([1, 4, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_14171, x = var_14127_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_14191 = const()[name = string("op_14191"), val = tensor([1, 4, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_14191, x = var_14134_cast_fp16)[name = string("x_331_cast_fp16")]; + bool var_14218_transpose_x_1 = const()[name = string("op_14218_transpose_x_1"), val = bool(false)]; + bool var_14218_transpose_y_1 = const()[name = string("op_14218_transpose_y_1"), val = bool(true)]; + tensor var_14218 = matmul(transpose_x = var_14218_transpose_x_1, transpose_y = var_14218_transpose_y_1, x = query_states_81_cast_fp16, y = x_325_cast_fp16)[name = string("op_14218")]; + fp16 var_14219_to_fp16 = const()[name = string("op_14219_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_121_cast_fp16 = mul(x = var_14218, y = var_14219_to_fp16)[name = string("attn_weights_121_cast_fp16")]; + tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = var_2129)[name = string("attn_weights_123_cast_fp16")]; + int32 var_14254 = const()[name = string("op_14254"), val = int32(-1)]; + tensor attn_weights_125_cast_fp16 = softmax(axis = var_14254, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; + bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; + bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; + tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = x_331_cast_fp16)[name = string("attn_output_201_cast_fp16")]; + tensor var_14265_perm_0 = const()[name = string("op_14265_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14269 = const()[name = string("op_14269"), val = tensor([1, 1, 1024])]; + tensor var_14265_cast_fp16 = transpose(perm = var_14265_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_51")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_14269, x = var_14265_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_14274 = const()[name = string("op_14274"), val = tensor([0, 2, 1])]; + string var_14290_pad_type_0 = const()[name = string("op_14290_pad_type_0"), val = string("valid")]; + int32 var_14290_groups_0 = const()[name = string("op_14290_groups_0"), val = int32(1)]; + tensor var_14290_strides_0 = const()[name = string("op_14290_strides_0"), val = tensor([1])]; + tensor var_14290_pad_0 = const()[name = string("op_14290_pad_0"), val = tensor([0, 0])]; + tensor var_14290_dilations_0 = const()[name = string("op_14290_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775560384))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14275_cast_fp16 = transpose(perm = var_14274, x = attn_output_205_cast_fp16)[name = string("transpose_50")]; + tensor var_14290_cast_fp16 = conv(dilations = var_14290_dilations_0, groups = var_14290_groups_0, pad = var_14290_pad_0, pad_type = var_14290_pad_type_0, strides = var_14290_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_14275_cast_fp16)[name = string("op_14290_cast_fp16")]; + tensor var_14294 = const()[name = string("op_14294"), val = tensor([0, 2, 1])]; + int32 var_14305 = const()[name = string("op_14305"), val = int32(-1)]; + fp16 const_786_promoted_to_fp16 = const()[name = string("const_786_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_333_cast_fp16 = transpose(perm = var_14294, x = var_14290_cast_fp16)[name = string("transpose_49")]; + tensor var_14307_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_786_promoted_to_fp16)[name = string("op_14307_cast_fp16")]; + bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; + tensor input_411_cast_fp16 = concat(axis = var_14305, interleave = input_411_interleave_0, values = (hidden_states_333_cast_fp16, var_14307_cast_fp16))[name = string("input_411_cast_fp16")]; + tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; + fp16 var_14302_to_fp16 = const()[name = string("op_14302_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_14302_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; + tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; + tensor var_14321_to_fp16 = const()[name = string("op_14321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775597312)))]; + tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_14321_to_fp16)[name = string("attn_output_209_cast_fp16")]; + tensor hidden_states_335_cast_fp16 = add(x = hidden_states_325_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; + int32 var_14334 = const()[name = string("op_14334"), val = int32(-1)]; + fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14336_cast_fp16 = mul(x = hidden_states_335_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14336_cast_fp16")]; + bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; + tensor input_413_cast_fp16 = concat(axis = var_14334, interleave = input_413_interleave_0, values = (hidden_states_335_cast_fp16, var_14336_cast_fp16))[name = string("input_413_cast_fp16")]; + tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; + fp16 var_14331_to_fp16 = const()[name = string("op_14331_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_14331_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; + tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; + tensor var_14350_to_fp16 = const()[name = string("op_14350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775599680)))]; + tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_14350_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_14362 = const()[name = string("op_14362"), val = tensor([0, 2, 1])]; + tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; + tensor var_14363_cast_fp16 = transpose(perm = var_14362, x = x_333_cast_fp16)[name = string("transpose_48")]; + tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_14363_cast_fp16)[name = string("input_415_cast_fp16")]; + string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; + tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; + tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; + int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775602048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781574080))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781795328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787767360))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; + string var_14388_mode_0 = const()[name = string("op_14388_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14388_cast_fp16 = gelu(mode = var_14388_mode_0, x = x_335_cast_fp16)[name = string("op_14388_cast_fp16")]; + tensor input_417_cast_fp16 = mul(x = var_14388_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787988608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793960640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; + tensor var_14396_axes_0 = const()[name = string("op_14396_axes_0"), val = tensor([2])]; + tensor var_14396_cast_fp16 = squeeze(axes = var_14396_axes_0, x = e_41_cast_fp16)[name = string("op_14396_cast_fp16")]; + tensor var_14397 = const()[name = string("op_14397"), val = tensor([0, 2, 1])]; + int32 var_14408 = const()[name = string("op_14408"), val = int32(-1)]; + fp16 const_794_promoted_to_fp16 = const()[name = string("const_794_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_337_cast_fp16 = transpose(perm = var_14397, x = var_14396_cast_fp16)[name = string("transpose_47")]; + tensor var_14410_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_794_promoted_to_fp16)[name = string("op_14410_cast_fp16")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419_cast_fp16 = concat(axis = var_14408, interleave = input_419_interleave_0, values = (hidden_states_337_cast_fp16, var_14410_cast_fp16))[name = string("input_419_cast_fp16")]; + tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; + fp16 var_14405_to_fp16 = const()[name = string("op_14405_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_14405_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; + tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; + tensor var_14424_to_fp16 = const()[name = string("op_14424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793997568)))]; + tensor hidden_states_339_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_14424_to_fp16)[name = string("hidden_states_339_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = add(x = hidden_states_335_cast_fp16, y = hidden_states_339_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + int32 var_14475 = const()[name = string("op_14475"), val = int32(-1)]; + fp16 const_798_promoted_to_fp16 = const()[name = string("const_798_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14477_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = const_798_promoted_to_fp16)[name = string("op_14477_cast_fp16")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421_cast_fp16 = concat(axis = var_14475, interleave = input_421_interleave_0, values = (hidden_states_341_cast_fp16, var_14477_cast_fp16))[name = string("input_421_cast_fp16")]; + tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; + fp16 var_14472_to_fp16 = const()[name = string("op_14472_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_14472_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; + tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; + tensor var_14491_to_fp16 = const()[name = string("op_14491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793999936)))]; + tensor hidden_states_343_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_14491_to_fp16)[name = string("hidden_states_343_cast_fp16")]; + tensor var_14496 = const()[name = string("op_14496"), val = tensor([0, 2, 1])]; + tensor var_14499_axes_0 = const()[name = string("op_14499_axes_0"), val = tensor([2])]; + tensor var_14497_cast_fp16 = transpose(perm = var_14496, x = hidden_states_343_cast_fp16)[name = string("transpose_46")]; + tensor var_14499_cast_fp16 = expand_dims(axes = var_14499_axes_0, x = var_14497_cast_fp16)[name = string("op_14499_cast_fp16")]; + string var_14515_pad_type_0 = const()[name = string("op_14515_pad_type_0"), val = string("valid")]; + tensor var_14515_strides_0 = const()[name = string("op_14515_strides_0"), val = tensor([1, 1])]; + tensor var_14515_pad_0 = const()[name = string("op_14515_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14515_dilations_0 = const()[name = string("op_14515_dilations_0"), val = tensor([1, 1])]; + int32 var_14515_groups_0 = const()[name = string("op_14515_groups_0"), val = int32(1)]; + tensor var_14515 = conv(dilations = var_14515_dilations_0, groups = var_14515_groups_0, pad = var_14515_pad_0, pad_type = var_14515_pad_type_0, strides = var_14515_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_14499_cast_fp16)[name = string("op_14515")]; + tensor var_14520 = const()[name = string("op_14520"), val = tensor([1, 4, 1, 256])]; + tensor var_14521 = reshape(shape = var_14520, x = var_14515)[name = string("op_14521")]; + string var_14537_pad_type_0 = const()[name = string("op_14537_pad_type_0"), val = string("valid")]; + tensor var_14537_strides_0 = const()[name = string("op_14537_strides_0"), val = tensor([1, 1])]; + tensor var_14537_pad_0 = const()[name = string("op_14537_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14537_dilations_0 = const()[name = string("op_14537_dilations_0"), val = tensor([1, 1])]; + int32 var_14537_groups_0 = const()[name = string("op_14537_groups_0"), val = int32(1)]; + tensor var_14537 = conv(dilations = var_14537_dilations_0, groups = var_14537_groups_0, pad = var_14537_pad_0, pad_type = var_14537_pad_type_0, strides = var_14537_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_14499_cast_fp16)[name = string("op_14537")]; + tensor var_14542 = const()[name = string("op_14542"), val = tensor([1, 1, 1, 256])]; + tensor var_14543 = reshape(shape = var_14542, x = var_14537)[name = string("op_14543")]; + string var_14559_pad_type_0 = const()[name = string("op_14559_pad_type_0"), val = string("valid")]; + tensor var_14559_strides_0 = const()[name = string("op_14559_strides_0"), val = tensor([1, 1])]; + tensor var_14559_pad_0 = const()[name = string("op_14559_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14559_dilations_0 = const()[name = string("op_14559_dilations_0"), val = tensor([1, 1])]; + int32 var_14559_groups_0 = const()[name = string("op_14559_groups_0"), val = int32(1)]; + tensor var_14559 = conv(dilations = var_14559_dilations_0, groups = var_14559_groups_0, pad = var_14559_pad_0, pad_type = var_14559_pad_type_0, strides = var_14559_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_14499_cast_fp16)[name = string("op_14559")]; + tensor var_14564 = const()[name = string("op_14564"), val = tensor([1, 1, 1, 256])]; + tensor var_14565 = reshape(shape = var_14564, x = var_14559)[name = string("op_14565")]; + int32 var_14580 = const()[name = string("op_14580"), val = int32(-1)]; + fp16 const_802_promoted = const()[name = string("const_802_promoted"), val = fp16(-0x1p+0)]; + tensor var_14582 = mul(x = var_14521, y = const_802_promoted)[name = string("op_14582")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425 = concat(axis = var_14580, interleave = input_425_interleave_0, values = (var_14521, var_14582))[name = string("input_425")]; + tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; + fp16 var_14577_to_fp16 = const()[name = string("op_14577_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_14577_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; + tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; + tensor var_14596_to_fp16 = const()[name = string("op_14596_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002304)))]; + tensor q_43_cast_fp16 = mul(x = normed_511, y = var_14596_to_fp16)[name = string("q_43_cast_fp16")]; + int32 var_14607 = const()[name = string("op_14607"), val = int32(-1)]; + fp16 const_806_promoted = const()[name = string("const_806_promoted"), val = fp16(-0x1p+0)]; + tensor var_14609 = mul(x = var_14543, y = const_806_promoted)[name = string("op_14609")]; + bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; + tensor input_427 = concat(axis = var_14607, interleave = input_427_interleave_0, values = (var_14543, var_14609))[name = string("input_427")]; + tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; + fp16 var_14604_to_fp16 = const()[name = string("op_14604_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_14604_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; + tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; + tensor var_14623_to_fp16 = const()[name = string("op_14623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002880)))]; + tensor k_43_cast_fp16 = mul(x = normed_515, y = var_14623_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_14625_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14625_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_812_promoted_to_fp16 = const()[name = string("const_812_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14646_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_812_promoted_to_fp16)[name = string("op_14646_cast_fp16")]; + int32 var_14648 = const()[name = string("op_14648"), val = int32(-1)]; + bool var_14649_interleave_0 = const()[name = string("op_14649_interleave_0"), val = bool(false)]; + tensor var_14649_cast_fp16 = concat(axis = var_14648, interleave = var_14649_interleave_0, values = (var_14646_cast_fp16, x1_85_cast_fp16))[name = string("op_14649_cast_fp16")]; + tensor var_14650_cast_fp16 = mul(x = var_14649_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14650_cast_fp16")]; + tensor query_states_85_cast_fp16 = add(x = var_14625_cast_fp16, y = var_14650_cast_fp16)[name = string("query_states_85_cast_fp16")]; + tensor var_14653_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14653_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_815_promoted_to_fp16 = const()[name = string("const_815_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14674_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_815_promoted_to_fp16)[name = string("op_14674_cast_fp16")]; + int32 var_14676 = const()[name = string("op_14676"), val = int32(-1)]; + bool var_14677_interleave_0 = const()[name = string("op_14677_interleave_0"), val = bool(false)]; + tensor var_14677_cast_fp16 = concat(axis = var_14676, interleave = var_14677_interleave_0, values = (var_14674_cast_fp16, x1_87_cast_fp16))[name = string("op_14677_cast_fp16")]; + tensor var_14678_cast_fp16 = mul(x = var_14677_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14678_cast_fp16")]; + tensor key_states_85_cast_fp16 = add(x = var_14653_cast_fp16, y = var_14678_cast_fp16)[name = string("key_states_85_cast_fp16")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([18])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([19])]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_170")]; + tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; + tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; + int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; + bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; + tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_256, concat_171_values1_0, var_1955, concat_171_values3_0))[name = string("concat_171")]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_170, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_171, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = key_states_85_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_42")]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([40])]; + tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; + tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; + tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([41])]; + int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; + bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; + tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_174")]; + tensor concat_175_values1_0 = const()[name = string("concat_175_values1_0"), val = tensor([0])]; + tensor concat_175_values3_0 = const()[name = string("concat_175_values3_0"), val = tensor([0])]; + int32 concat_175_axis_0 = const()[name = string("concat_175_axis_0"), val = int32(0)]; + bool concat_175_interleave_0 = const()[name = string("concat_175_interleave_0"), val = bool(false)]; + tensor concat_175 = concat(axis = concat_175_axis_0, interleave = concat_175_interleave_0, values = (expand_dims_262, concat_175_values1_0, var_1955, concat_175_values3_0))[name = string("concat_175")]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_174, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_175, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = var_14565, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_43")]; + tensor var_14733_begin_0 = const()[name = string("op_14733_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_14733_end_0 = const()[name = string("op_14733_end_0"), val = tensor([19, 1, 512, 256])]; + tensor var_14733_end_mask_0 = const()[name = string("op_14733_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14733_cast_fp16 = slice_by_index(begin = var_14733_begin_0, end = var_14733_end_0, end_mask = var_14733_end_mask_0, x = coreml_update_state_95)[name = string("op_14733_cast_fp16")]; + tensor var_14740_begin_0 = const()[name = string("op_14740_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_14740_end_0 = const()[name = string("op_14740_end_0"), val = tensor([41, 1, 512, 256])]; + tensor var_14740_end_mask_0 = const()[name = string("op_14740_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14740_cast_fp16 = slice_by_index(begin = var_14740_begin_0, end = var_14740_end_0, end_mask = var_14740_end_mask_0, x = coreml_update_state_95)[name = string("op_14740_cast_fp16")]; + tensor var_14777 = const()[name = string("op_14777"), val = tensor([1, 4, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_14777, x = var_14733_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_14797 = const()[name = string("op_14797"), val = tensor([1, 4, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_14797, x = var_14740_cast_fp16)[name = string("x_347_cast_fp16")]; + bool var_14824_transpose_x_1 = const()[name = string("op_14824_transpose_x_1"), val = bool(false)]; + bool var_14824_transpose_y_1 = const()[name = string("op_14824_transpose_y_1"), val = bool(true)]; + tensor var_14824 = matmul(transpose_x = var_14824_transpose_x_1, transpose_y = var_14824_transpose_y_1, x = query_states_85_cast_fp16, y = x_341_cast_fp16)[name = string("op_14824")]; + fp16 var_14825_to_fp16 = const()[name = string("op_14825_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_127_cast_fp16 = mul(x = var_14824, y = var_14825_to_fp16)[name = string("attn_weights_127_cast_fp16")]; + tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = var_2129)[name = string("attn_weights_129_cast_fp16")]; + int32 var_14860 = const()[name = string("op_14860"), val = int32(-1)]; + tensor attn_weights_131_cast_fp16 = softmax(axis = var_14860, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; + bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; + bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; + tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = x_347_cast_fp16)[name = string("attn_output_211_cast_fp16")]; + tensor var_14871_perm_0 = const()[name = string("op_14871_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14875 = const()[name = string("op_14875"), val = tensor([1, 1, 1024])]; + tensor var_14871_cast_fp16 = transpose(perm = var_14871_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_45")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_14875, x = var_14871_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_14880 = const()[name = string("op_14880"), val = tensor([0, 2, 1])]; + string var_14896_pad_type_0 = const()[name = string("op_14896_pad_type_0"), val = string("valid")]; + int32 var_14896_groups_0 = const()[name = string("op_14896_groups_0"), val = int32(1)]; + tensor var_14896_strides_0 = const()[name = string("op_14896_strides_0"), val = tensor([1])]; + tensor var_14896_pad_0 = const()[name = string("op_14896_pad_0"), val = tensor([0, 0])]; + tensor var_14896_dilations_0 = const()[name = string("op_14896_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794003456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794888256))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14881_cast_fp16 = transpose(perm = var_14880, x = attn_output_215_cast_fp16)[name = string("transpose_44")]; + tensor var_14896_cast_fp16 = conv(dilations = var_14896_dilations_0, groups = var_14896_groups_0, pad = var_14896_pad_0, pad_type = var_14896_pad_type_0, strides = var_14896_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_14881_cast_fp16)[name = string("op_14896_cast_fp16")]; + tensor var_14900 = const()[name = string("op_14900"), val = tensor([0, 2, 1])]; + int32 var_14911 = const()[name = string("op_14911"), val = int32(-1)]; + fp16 const_824_promoted_to_fp16 = const()[name = string("const_824_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_349_cast_fp16 = transpose(perm = var_14900, x = var_14896_cast_fp16)[name = string("transpose_43")]; + tensor var_14913_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_824_promoted_to_fp16)[name = string("op_14913_cast_fp16")]; + bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; + tensor input_431_cast_fp16 = concat(axis = var_14911, interleave = input_431_interleave_0, values = (hidden_states_349_cast_fp16, var_14913_cast_fp16))[name = string("input_431_cast_fp16")]; + tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; + fp16 var_14908_to_fp16 = const()[name = string("op_14908_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_14908_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; + tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; + tensor var_14927_to_fp16 = const()[name = string("op_14927_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794925184)))]; + tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_14927_to_fp16)[name = string("attn_output_219_cast_fp16")]; + tensor hidden_states_351_cast_fp16 = add(x = hidden_states_341_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; + int32 var_14940 = const()[name = string("op_14940"), val = int32(-1)]; + fp16 const_828_promoted_to_fp16 = const()[name = string("const_828_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14942_cast_fp16 = mul(x = hidden_states_351_cast_fp16, y = const_828_promoted_to_fp16)[name = string("op_14942_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_14940, interleave = input_433_interleave_0, values = (hidden_states_351_cast_fp16, var_14942_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; + fp16 var_14937_to_fp16 = const()[name = string("op_14937_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_14937_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; + tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; + tensor var_14956_to_fp16 = const()[name = string("op_14956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794927552)))]; + tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_14956_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_14968 = const()[name = string("op_14968"), val = tensor([0, 2, 1])]; + tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; + tensor var_14969_cast_fp16 = transpose(perm = var_14968, x = x_349_cast_fp16)[name = string("transpose_42")]; + tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_14969_cast_fp16)[name = string("input_435_cast_fp16")]; + string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; + tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; + tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; + int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794929920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800901952))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801123200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807095232))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; + string var_14994_mode_0 = const()[name = string("op_14994_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14994_cast_fp16 = gelu(mode = var_14994_mode_0, x = x_351_cast_fp16)[name = string("op_14994_cast_fp16")]; + tensor input_437_cast_fp16 = mul(x = var_14994_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807316480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813288512))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; + tensor var_15002_axes_0 = const()[name = string("op_15002_axes_0"), val = tensor([2])]; + tensor var_15002_cast_fp16 = squeeze(axes = var_15002_axes_0, x = e_43_cast_fp16)[name = string("op_15002_cast_fp16")]; + tensor var_15003 = const()[name = string("op_15003"), val = tensor([0, 2, 1])]; + int32 var_15014 = const()[name = string("op_15014"), val = int32(-1)]; + fp16 const_832_promoted_to_fp16 = const()[name = string("const_832_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_353_cast_fp16 = transpose(perm = var_15003, x = var_15002_cast_fp16)[name = string("transpose_41")]; + tensor var_15016_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_832_promoted_to_fp16)[name = string("op_15016_cast_fp16")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439_cast_fp16 = concat(axis = var_15014, interleave = input_439_interleave_0, values = (hidden_states_353_cast_fp16, var_15016_cast_fp16))[name = string("input_439_cast_fp16")]; + tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; + fp16 var_15011_to_fp16 = const()[name = string("op_15011_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_15011_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; + tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; + tensor var_15030_to_fp16 = const()[name = string("op_15030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813325440)))]; + tensor hidden_states_355_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_15030_to_fp16)[name = string("hidden_states_355_cast_fp16")]; + tensor hidden_states_357_cast_fp16 = add(x = hidden_states_351_cast_fp16, y = hidden_states_355_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; + int32 var_15081 = const()[name = string("op_15081"), val = int32(-1)]; + fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15083_cast_fp16 = mul(x = hidden_states_357_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15083_cast_fp16")]; + bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; + tensor input_441_cast_fp16 = concat(axis = var_15081, interleave = input_441_interleave_0, values = (hidden_states_357_cast_fp16, var_15083_cast_fp16))[name = string("input_441_cast_fp16")]; + tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; + fp16 var_15078_to_fp16 = const()[name = string("op_15078_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_15078_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; + tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; + tensor var_15097_to_fp16 = const()[name = string("op_15097_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813327808)))]; + tensor hidden_states_359_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_15097_to_fp16)[name = string("hidden_states_359_cast_fp16")]; + tensor var_15102 = const()[name = string("op_15102"), val = tensor([0, 2, 1])]; + tensor var_15105_axes_0 = const()[name = string("op_15105_axes_0"), val = tensor([2])]; + tensor var_15103_cast_fp16 = transpose(perm = var_15102, x = hidden_states_359_cast_fp16)[name = string("transpose_40")]; + tensor var_15105_cast_fp16 = expand_dims(axes = var_15105_axes_0, x = var_15103_cast_fp16)[name = string("op_15105_cast_fp16")]; + string var_15121_pad_type_0 = const()[name = string("op_15121_pad_type_0"), val = string("valid")]; + tensor var_15121_strides_0 = const()[name = string("op_15121_strides_0"), val = tensor([1, 1])]; + tensor var_15121_pad_0 = const()[name = string("op_15121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15121_dilations_0 = const()[name = string("op_15121_dilations_0"), val = tensor([1, 1])]; + int32 var_15121_groups_0 = const()[name = string("op_15121_groups_0"), val = int32(1)]; + tensor var_15121 = conv(dilations = var_15121_dilations_0, groups = var_15121_groups_0, pad = var_15121_pad_0, pad_type = var_15121_pad_type_0, strides = var_15121_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_15105_cast_fp16)[name = string("op_15121")]; + tensor var_15126 = const()[name = string("op_15126"), val = tensor([1, 4, 1, 256])]; + tensor var_15127 = reshape(shape = var_15126, x = var_15121)[name = string("op_15127")]; + string var_15143_pad_type_0 = const()[name = string("op_15143_pad_type_0"), val = string("valid")]; + tensor var_15143_strides_0 = const()[name = string("op_15143_strides_0"), val = tensor([1, 1])]; + tensor var_15143_pad_0 = const()[name = string("op_15143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15143_dilations_0 = const()[name = string("op_15143_dilations_0"), val = tensor([1, 1])]; + int32 var_15143_groups_0 = const()[name = string("op_15143_groups_0"), val = int32(1)]; + tensor var_15143 = conv(dilations = var_15143_dilations_0, groups = var_15143_groups_0, pad = var_15143_pad_0, pad_type = var_15143_pad_type_0, strides = var_15143_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_15105_cast_fp16)[name = string("op_15143")]; + tensor var_15148 = const()[name = string("op_15148"), val = tensor([1, 1, 1, 256])]; + tensor var_15149 = reshape(shape = var_15148, x = var_15143)[name = string("op_15149")]; + string var_15165_pad_type_0 = const()[name = string("op_15165_pad_type_0"), val = string("valid")]; + tensor var_15165_strides_0 = const()[name = string("op_15165_strides_0"), val = tensor([1, 1])]; + tensor var_15165_pad_0 = const()[name = string("op_15165_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15165_dilations_0 = const()[name = string("op_15165_dilations_0"), val = tensor([1, 1])]; + int32 var_15165_groups_0 = const()[name = string("op_15165_groups_0"), val = int32(1)]; + tensor var_15165 = conv(dilations = var_15165_dilations_0, groups = var_15165_groups_0, pad = var_15165_pad_0, pad_type = var_15165_pad_type_0, strides = var_15165_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_15105_cast_fp16)[name = string("op_15165")]; + tensor var_15170 = const()[name = string("op_15170"), val = tensor([1, 1, 1, 256])]; + tensor var_15171 = reshape(shape = var_15170, x = var_15165)[name = string("op_15171")]; + int32 var_15186 = const()[name = string("op_15186"), val = int32(-1)]; + fp16 const_840_promoted = const()[name = string("const_840_promoted"), val = fp16(-0x1p+0)]; + tensor var_15188 = mul(x = var_15127, y = const_840_promoted)[name = string("op_15188")]; + bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; + tensor input_445 = concat(axis = var_15186, interleave = input_445_interleave_0, values = (var_15127, var_15188))[name = string("input_445")]; + tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; + fp16 var_15183_to_fp16 = const()[name = string("op_15183_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_15183_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; + tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; + tensor var_15202_to_fp16 = const()[name = string("op_15202_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330176)))]; + tensor q_45_cast_fp16 = mul(x = normed_535, y = var_15202_to_fp16)[name = string("q_45_cast_fp16")]; + int32 var_15213 = const()[name = string("op_15213"), val = int32(-1)]; + fp16 const_844_promoted = const()[name = string("const_844_promoted"), val = fp16(-0x1p+0)]; + tensor var_15215 = mul(x = var_15149, y = const_844_promoted)[name = string("op_15215")]; + bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; + tensor input_447 = concat(axis = var_15213, interleave = input_447_interleave_0, values = (var_15149, var_15215))[name = string("input_447")]; + tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; + fp16 var_15210_to_fp16 = const()[name = string("op_15210_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_15210_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; + tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; + tensor var_15229_to_fp16 = const()[name = string("op_15229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330752)))]; + tensor k_45_cast_fp16 = mul(x = normed_539, y = var_15229_to_fp16)[name = string("k_45_cast_fp16")]; + tensor var_15231_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15231_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_850_promoted_to_fp16 = const()[name = string("const_850_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15252_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_850_promoted_to_fp16)[name = string("op_15252_cast_fp16")]; + int32 var_15254 = const()[name = string("op_15254"), val = int32(-1)]; + bool var_15255_interleave_0 = const()[name = string("op_15255_interleave_0"), val = bool(false)]; + tensor var_15255_cast_fp16 = concat(axis = var_15254, interleave = var_15255_interleave_0, values = (var_15252_cast_fp16, x1_89_cast_fp16))[name = string("op_15255_cast_fp16")]; + tensor var_15256_cast_fp16 = mul(x = var_15255_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15256_cast_fp16")]; + tensor query_states_89_cast_fp16 = add(x = var_15231_cast_fp16, y = var_15256_cast_fp16)[name = string("query_states_89_cast_fp16")]; + tensor var_15259_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15259_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_853_promoted_to_fp16 = const()[name = string("const_853_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15280_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_853_promoted_to_fp16)[name = string("op_15280_cast_fp16")]; + int32 var_15282 = const()[name = string("op_15282"), val = int32(-1)]; + bool var_15283_interleave_0 = const()[name = string("op_15283_interleave_0"), val = bool(false)]; + tensor var_15283_cast_fp16 = concat(axis = var_15282, interleave = var_15283_interleave_0, values = (var_15280_cast_fp16, x1_91_cast_fp16))[name = string("op_15283_cast_fp16")]; + tensor var_15284_cast_fp16 = mul(x = var_15283_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15284_cast_fp16")]; + tensor key_states_89_cast_fp16 = add(x = var_15259_cast_fp16, y = var_15284_cast_fp16)[name = string("key_states_89_cast_fp16")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([19])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([20])]; + int32 concat_178_axis_0 = const()[name = string("concat_178_axis_0"), val = int32(0)]; + bool concat_178_interleave_0 = const()[name = string("concat_178_interleave_0"), val = bool(false)]; + tensor concat_178 = concat(axis = concat_178_axis_0, interleave = concat_178_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_178")]; + tensor concat_179_values1_0 = const()[name = string("concat_179_values1_0"), val = tensor([0])]; + tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; + int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; + bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; + tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_268, concat_179_values1_0, var_1955, concat_179_values3_0))[name = string("concat_179")]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_178, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_179, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = key_states_89_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_44")]; + tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([41])]; + tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; + tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([42])]; + int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; + bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; + tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_182")]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_274, concat_183_values1_0, var_1955, concat_183_values3_0))[name = string("concat_183")]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = var_15171, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_45")]; + tensor var_15339_begin_0 = const()[name = string("op_15339_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_15339_end_0 = const()[name = string("op_15339_end_0"), val = tensor([20, 1, 512, 256])]; + tensor var_15339_end_mask_0 = const()[name = string("op_15339_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15339_cast_fp16 = slice_by_index(begin = var_15339_begin_0, end = var_15339_end_0, end_mask = var_15339_end_mask_0, x = coreml_update_state_97)[name = string("op_15339_cast_fp16")]; + tensor var_15346_begin_0 = const()[name = string("op_15346_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_15346_end_0 = const()[name = string("op_15346_end_0"), val = tensor([42, 1, 512, 256])]; + tensor var_15346_end_mask_0 = const()[name = string("op_15346_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15346_cast_fp16 = slice_by_index(begin = var_15346_begin_0, end = var_15346_end_0, end_mask = var_15346_end_mask_0, x = coreml_update_state_97)[name = string("op_15346_cast_fp16")]; + tensor var_15383 = const()[name = string("op_15383"), val = tensor([1, 4, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_15383, x = var_15339_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_15403 = const()[name = string("op_15403"), val = tensor([1, 4, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_15403, x = var_15346_cast_fp16)[name = string("x_363_cast_fp16")]; + bool var_15430_transpose_x_1 = const()[name = string("op_15430_transpose_x_1"), val = bool(false)]; + bool var_15430_transpose_y_1 = const()[name = string("op_15430_transpose_y_1"), val = bool(true)]; + tensor var_15430 = matmul(transpose_x = var_15430_transpose_x_1, transpose_y = var_15430_transpose_y_1, x = query_states_89_cast_fp16, y = x_357_cast_fp16)[name = string("op_15430")]; + fp16 var_15431_to_fp16 = const()[name = string("op_15431_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_133_cast_fp16 = mul(x = var_15430, y = var_15431_to_fp16)[name = string("attn_weights_133_cast_fp16")]; + tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = var_2129)[name = string("attn_weights_135_cast_fp16")]; + int32 var_15466 = const()[name = string("op_15466"), val = int32(-1)]; + tensor attn_weights_137_cast_fp16 = softmax(axis = var_15466, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; + bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; + bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; + tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = x_363_cast_fp16)[name = string("attn_output_221_cast_fp16")]; + tensor var_15477_perm_0 = const()[name = string("op_15477_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15481 = const()[name = string("op_15481"), val = tensor([1, 1, 1024])]; + tensor var_15477_cast_fp16 = transpose(perm = var_15477_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_39")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_15481, x = var_15477_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_15486 = const()[name = string("op_15486"), val = tensor([0, 2, 1])]; + string var_15502_pad_type_0 = const()[name = string("op_15502_pad_type_0"), val = string("valid")]; + int32 var_15502_groups_0 = const()[name = string("op_15502_groups_0"), val = int32(1)]; + tensor var_15502_strides_0 = const()[name = string("op_15502_strides_0"), val = tensor([1])]; + tensor var_15502_pad_0 = const()[name = string("op_15502_pad_0"), val = tensor([0, 0])]; + tensor var_15502_dilations_0 = const()[name = string("op_15502_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813331328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814216128))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15487_cast_fp16 = transpose(perm = var_15486, x = attn_output_225_cast_fp16)[name = string("transpose_38")]; + tensor var_15502_cast_fp16 = conv(dilations = var_15502_dilations_0, groups = var_15502_groups_0, pad = var_15502_pad_0, pad_type = var_15502_pad_type_0, strides = var_15502_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_15487_cast_fp16)[name = string("op_15502_cast_fp16")]; + tensor var_15506 = const()[name = string("op_15506"), val = tensor([0, 2, 1])]; + int32 var_15517 = const()[name = string("op_15517"), val = int32(-1)]; + fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_365_cast_fp16 = transpose(perm = var_15506, x = var_15502_cast_fp16)[name = string("transpose_37")]; + tensor var_15519_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_15519_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_15517, interleave = input_451_interleave_0, values = (hidden_states_365_cast_fp16, var_15519_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; + fp16 var_15514_to_fp16 = const()[name = string("op_15514_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_15514_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; + tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; + tensor var_15533_to_fp16 = const()[name = string("op_15533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814253056)))]; + tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_15533_to_fp16)[name = string("attn_output_229_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = add(x = hidden_states_357_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + int32 var_15546 = const()[name = string("op_15546"), val = int32(-1)]; + fp16 const_866_promoted_to_fp16 = const()[name = string("const_866_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15548_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = const_866_promoted_to_fp16)[name = string("op_15548_cast_fp16")]; + bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; + tensor input_453_cast_fp16 = concat(axis = var_15546, interleave = input_453_interleave_0, values = (hidden_states_367_cast_fp16, var_15548_cast_fp16))[name = string("input_453_cast_fp16")]; + tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; + fp16 var_15543_to_fp16 = const()[name = string("op_15543_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_15543_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; + tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; + tensor var_15562_to_fp16 = const()[name = string("op_15562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814255424)))]; + tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_15562_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_15574 = const()[name = string("op_15574"), val = tensor([0, 2, 1])]; + tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; + tensor var_15575_cast_fp16 = transpose(perm = var_15574, x = x_365_cast_fp16)[name = string("transpose_36")]; + tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_15575_cast_fp16)[name = string("input_455_cast_fp16")]; + string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; + tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; + tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; + int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814257792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820229824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820451072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826423104))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; + string var_15600_mode_0 = const()[name = string("op_15600_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_15600_cast_fp16 = gelu(mode = var_15600_mode_0, x = x_367_cast_fp16)[name = string("op_15600_cast_fp16")]; + tensor input_457_cast_fp16 = mul(x = var_15600_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826644352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832616384))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; + tensor var_15608_axes_0 = const()[name = string("op_15608_axes_0"), val = tensor([2])]; + tensor var_15608_cast_fp16 = squeeze(axes = var_15608_axes_0, x = e_45_cast_fp16)[name = string("op_15608_cast_fp16")]; + tensor var_15609 = const()[name = string("op_15609"), val = tensor([0, 2, 1])]; + int32 var_15620 = const()[name = string("op_15620"), val = int32(-1)]; + fp16 const_870_promoted_to_fp16 = const()[name = string("const_870_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_369_cast_fp16 = transpose(perm = var_15609, x = var_15608_cast_fp16)[name = string("transpose_35")]; + tensor var_15622_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_870_promoted_to_fp16)[name = string("op_15622_cast_fp16")]; + bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; + tensor input_459_cast_fp16 = concat(axis = var_15620, interleave = input_459_interleave_0, values = (hidden_states_369_cast_fp16, var_15622_cast_fp16))[name = string("input_459_cast_fp16")]; + tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; + fp16 var_15617_to_fp16 = const()[name = string("op_15617_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_15617_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; + tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; + tensor var_15636_to_fp16 = const()[name = string("op_15636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832653312)))]; + tensor hidden_states_371_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_15636_to_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor hidden_states_373_cast_fp16 = add(x = hidden_states_367_cast_fp16, y = hidden_states_371_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; + int32 var_15687 = const()[name = string("op_15687"), val = int32(-1)]; + fp16 const_874_promoted_to_fp16 = const()[name = string("const_874_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15689_cast_fp16 = mul(x = hidden_states_373_cast_fp16, y = const_874_promoted_to_fp16)[name = string("op_15689_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_15687, interleave = input_461_interleave_0, values = (hidden_states_373_cast_fp16, var_15689_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; + fp16 var_15684_to_fp16 = const()[name = string("op_15684_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_15684_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; + tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; + tensor var_15703_to_fp16 = const()[name = string("op_15703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832655680)))]; + tensor hidden_states_375_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_15703_to_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor var_15708 = const()[name = string("op_15708"), val = tensor([0, 2, 1])]; + tensor var_15711_axes_0 = const()[name = string("op_15711_axes_0"), val = tensor([2])]; + tensor var_15709_cast_fp16 = transpose(perm = var_15708, x = hidden_states_375_cast_fp16)[name = string("transpose_34")]; + tensor var_15711_cast_fp16 = expand_dims(axes = var_15711_axes_0, x = var_15709_cast_fp16)[name = string("op_15711_cast_fp16")]; + string var_15727_pad_type_0 = const()[name = string("op_15727_pad_type_0"), val = string("valid")]; + tensor var_15727_strides_0 = const()[name = string("op_15727_strides_0"), val = tensor([1, 1])]; + tensor var_15727_pad_0 = const()[name = string("op_15727_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15727_dilations_0 = const()[name = string("op_15727_dilations_0"), val = tensor([1, 1])]; + int32 var_15727_groups_0 = const()[name = string("op_15727_groups_0"), val = int32(1)]; + tensor var_15727 = conv(dilations = var_15727_dilations_0, groups = var_15727_groups_0, pad = var_15727_pad_0, pad_type = var_15727_pad_type_0, strides = var_15727_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_15711_cast_fp16)[name = string("op_15727")]; + tensor var_15732 = const()[name = string("op_15732"), val = tensor([1, 4, 1, 256])]; + tensor var_15733 = reshape(shape = var_15732, x = var_15727)[name = string("op_15733")]; + string var_15749_pad_type_0 = const()[name = string("op_15749_pad_type_0"), val = string("valid")]; + tensor var_15749_strides_0 = const()[name = string("op_15749_strides_0"), val = tensor([1, 1])]; + tensor var_15749_pad_0 = const()[name = string("op_15749_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15749_dilations_0 = const()[name = string("op_15749_dilations_0"), val = tensor([1, 1])]; + int32 var_15749_groups_0 = const()[name = string("op_15749_groups_0"), val = int32(1)]; + tensor var_15749 = conv(dilations = var_15749_dilations_0, groups = var_15749_groups_0, pad = var_15749_pad_0, pad_type = var_15749_pad_type_0, strides = var_15749_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_15711_cast_fp16)[name = string("op_15749")]; + tensor var_15754 = const()[name = string("op_15754"), val = tensor([1, 1, 1, 256])]; + tensor var_15755 = reshape(shape = var_15754, x = var_15749)[name = string("op_15755")]; + string var_15771_pad_type_0 = const()[name = string("op_15771_pad_type_0"), val = string("valid")]; + tensor var_15771_strides_0 = const()[name = string("op_15771_strides_0"), val = tensor([1, 1])]; + tensor var_15771_pad_0 = const()[name = string("op_15771_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15771_dilations_0 = const()[name = string("op_15771_dilations_0"), val = tensor([1, 1])]; + int32 var_15771_groups_0 = const()[name = string("op_15771_groups_0"), val = int32(1)]; + tensor var_15771 = conv(dilations = var_15771_dilations_0, groups = var_15771_groups_0, pad = var_15771_pad_0, pad_type = var_15771_pad_type_0, strides = var_15771_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_15711_cast_fp16)[name = string("op_15771")]; + tensor var_15776 = const()[name = string("op_15776"), val = tensor([1, 1, 1, 256])]; + tensor var_15777 = reshape(shape = var_15776, x = var_15771)[name = string("op_15777")]; + int32 var_15792 = const()[name = string("op_15792"), val = int32(-1)]; + fp16 const_878_promoted = const()[name = string("const_878_promoted"), val = fp16(-0x1p+0)]; + tensor var_15794 = mul(x = var_15733, y = const_878_promoted)[name = string("op_15794")]; + bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; + tensor input_465 = concat(axis = var_15792, interleave = input_465_interleave_0, values = (var_15733, var_15794))[name = string("input_465")]; + tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; + fp16 var_15789_to_fp16 = const()[name = string("op_15789_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_15789_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; + tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; + tensor var_15808_to_fp16 = const()[name = string("op_15808_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658048)))]; + tensor q_47_cast_fp16 = mul(x = normed_559, y = var_15808_to_fp16)[name = string("q_47_cast_fp16")]; + int32 var_15819 = const()[name = string("op_15819"), val = int32(-1)]; + fp16 const_882_promoted = const()[name = string("const_882_promoted"), val = fp16(-0x1p+0)]; + tensor var_15821 = mul(x = var_15755, y = const_882_promoted)[name = string("op_15821")]; + bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; + tensor input_467 = concat(axis = var_15819, interleave = input_467_interleave_0, values = (var_15755, var_15821))[name = string("input_467")]; + tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; + fp16 var_15816_to_fp16 = const()[name = string("op_15816_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_15816_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; + tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; + tensor var_15835_to_fp16 = const()[name = string("op_15835_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658624)))]; + tensor k_47_cast_fp16 = mul(x = normed_563, y = var_15835_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_15837_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_15837_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_888_promoted_to_fp16 = const()[name = string("const_888_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15858_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_888_promoted_to_fp16)[name = string("op_15858_cast_fp16")]; + int32 var_15860 = const()[name = string("op_15860"), val = int32(-1)]; + bool var_15861_interleave_0 = const()[name = string("op_15861_interleave_0"), val = bool(false)]; + tensor var_15861_cast_fp16 = concat(axis = var_15860, interleave = var_15861_interleave_0, values = (var_15858_cast_fp16, x1_93_cast_fp16))[name = string("op_15861_cast_fp16")]; + tensor var_15862_cast_fp16 = mul(x = var_15861_cast_fp16, y = sin_21_cast_fp16)[name = string("op_15862_cast_fp16")]; + tensor query_states_93_cast_fp16 = add(x = var_15837_cast_fp16, y = var_15862_cast_fp16)[name = string("query_states_93_cast_fp16")]; + tensor var_15865_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_15865_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15886_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_15886_cast_fp16")]; + int32 var_15888 = const()[name = string("op_15888"), val = int32(-1)]; + bool var_15889_interleave_0 = const()[name = string("op_15889_interleave_0"), val = bool(false)]; + tensor var_15889_cast_fp16 = concat(axis = var_15888, interleave = var_15889_interleave_0, values = (var_15886_cast_fp16, x1_95_cast_fp16))[name = string("op_15889_cast_fp16")]; + tensor var_15890_cast_fp16 = mul(x = var_15889_cast_fp16, y = sin_21_cast_fp16)[name = string("op_15890_cast_fp16")]; + tensor key_states_93_cast_fp16 = add(x = var_15865_cast_fp16, y = var_15890_cast_fp16)[name = string("key_states_93_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_46_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_46")]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = var_15777, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_47_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_47")]; + tensor var_15945_begin_0 = const()[name = string("op_15945_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_15945_end_0 = const()[name = string("op_15945_end_0"), val = tensor([4, 1, 4096, 256])]; + tensor var_15945_end_mask_0 = const()[name = string("op_15945_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15945_cast_fp16 = slice_by_index(begin = var_15945_begin_0, end = var_15945_end_0, end_mask = var_15945_end_mask_0, x = coreml_update_state_99)[name = string("op_15945_cast_fp16")]; + tensor var_15952_begin_0 = const()[name = string("op_15952_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_15952_end_0 = const()[name = string("op_15952_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_15952_end_mask_0 = const()[name = string("op_15952_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_15952_cast_fp16 = slice_by_index(begin = var_15952_begin_0, end = var_15952_end_0, end_mask = var_15952_end_mask_0, x = coreml_update_state_99)[name = string("op_15952_cast_fp16")]; + tensor var_15989 = const()[name = string("op_15989"), val = tensor([1, 4, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_15989, x = var_15945_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_16009 = const()[name = string("op_16009"), val = tensor([1, 4, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_16009, x = var_15952_cast_fp16)[name = string("x_379_cast_fp16")]; + bool var_16036_transpose_x_1 = const()[name = string("op_16036_transpose_x_1"), val = bool(false)]; + bool var_16036_transpose_y_1 = const()[name = string("op_16036_transpose_y_1"), val = bool(true)]; + tensor var_16036 = matmul(transpose_x = var_16036_transpose_x_1, transpose_y = var_16036_transpose_y_1, x = query_states_93_cast_fp16, y = x_373_cast_fp16)[name = string("op_16036")]; + fp16 var_16037_to_fp16 = const()[name = string("op_16037_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_139_cast_fp16 = mul(x = var_16036, y = var_16037_to_fp16)[name = string("attn_weights_139_cast_fp16")]; + tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; + int32 var_16072 = const()[name = string("op_16072"), val = int32(-1)]; + tensor attn_weights_143_cast_fp16 = softmax(axis = var_16072, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; + bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; + bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; + tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = x_379_cast_fp16)[name = string("attn_output_231_cast_fp16")]; + tensor var_16083_perm_0 = const()[name = string("op_16083_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16087 = const()[name = string("op_16087"), val = tensor([1, 1, 1024])]; + tensor var_16083_cast_fp16 = transpose(perm = var_16083_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_33")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_16087, x = var_16083_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_16092 = const()[name = string("op_16092"), val = tensor([0, 2, 1])]; + string var_16108_pad_type_0 = const()[name = string("op_16108_pad_type_0"), val = string("valid")]; + int32 var_16108_groups_0 = const()[name = string("op_16108_groups_0"), val = int32(1)]; + tensor var_16108_strides_0 = const()[name = string("op_16108_strides_0"), val = tensor([1])]; + tensor var_16108_pad_0 = const()[name = string("op_16108_pad_0"), val = tensor([0, 0])]; + tensor var_16108_dilations_0 = const()[name = string("op_16108_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832659200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833544000))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16093_cast_fp16 = transpose(perm = var_16092, x = attn_output_235_cast_fp16)[name = string("transpose_32")]; + tensor var_16108_cast_fp16 = conv(dilations = var_16108_dilations_0, groups = var_16108_groups_0, pad = var_16108_pad_0, pad_type = var_16108_pad_type_0, strides = var_16108_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_16093_cast_fp16)[name = string("op_16108_cast_fp16")]; + tensor var_16112 = const()[name = string("op_16112"), val = tensor([0, 2, 1])]; + int32 var_16123 = const()[name = string("op_16123"), val = int32(-1)]; + fp16 const_900_promoted_to_fp16 = const()[name = string("const_900_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_381_cast_fp16 = transpose(perm = var_16112, x = var_16108_cast_fp16)[name = string("transpose_31")]; + tensor var_16125_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_900_promoted_to_fp16)[name = string("op_16125_cast_fp16")]; + bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; + tensor input_471_cast_fp16 = concat(axis = var_16123, interleave = input_471_interleave_0, values = (hidden_states_381_cast_fp16, var_16125_cast_fp16))[name = string("input_471_cast_fp16")]; + tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; + fp16 var_16120_to_fp16 = const()[name = string("op_16120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_16120_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; + tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; + tensor var_16139_to_fp16 = const()[name = string("op_16139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833580928)))]; + tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_16139_to_fp16)[name = string("attn_output_239_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = add(x = hidden_states_373_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + int32 var_16152 = const()[name = string("op_16152"), val = int32(-1)]; + fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16154_cast_fp16 = mul(x = hidden_states_383_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16154_cast_fp16")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473_cast_fp16 = concat(axis = var_16152, interleave = input_473_interleave_0, values = (hidden_states_383_cast_fp16, var_16154_cast_fp16))[name = string("input_473_cast_fp16")]; + tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; + fp16 var_16149_to_fp16 = const()[name = string("op_16149_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_16149_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; + tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; + tensor var_16168_to_fp16 = const()[name = string("op_16168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833583296)))]; + tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_16168_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_16180 = const()[name = string("op_16180"), val = tensor([0, 2, 1])]; + tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; + tensor var_16181_cast_fp16 = transpose(perm = var_16180, x = x_381_cast_fp16)[name = string("transpose_30")]; + tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_16181_cast_fp16)[name = string("input_475_cast_fp16")]; + string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; + tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; + tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; + int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833585664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839557696))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839778944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845750976))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; + string var_16206_mode_0 = const()[name = string("op_16206_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16206_cast_fp16 = gelu(mode = var_16206_mode_0, x = x_383_cast_fp16)[name = string("op_16206_cast_fp16")]; + tensor input_477_cast_fp16 = mul(x = var_16206_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845972224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851944256))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; + tensor var_16214_axes_0 = const()[name = string("op_16214_axes_0"), val = tensor([2])]; + tensor var_16214_cast_fp16 = squeeze(axes = var_16214_axes_0, x = e_47_cast_fp16)[name = string("op_16214_cast_fp16")]; + tensor var_16215 = const()[name = string("op_16215"), val = tensor([0, 2, 1])]; + int32 var_16226 = const()[name = string("op_16226"), val = int32(-1)]; + fp16 const_908_promoted_to_fp16 = const()[name = string("const_908_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_385_cast_fp16 = transpose(perm = var_16215, x = var_16214_cast_fp16)[name = string("transpose_29")]; + tensor var_16228_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_908_promoted_to_fp16)[name = string("op_16228_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_16226, interleave = input_479_interleave_0, values = (hidden_states_385_cast_fp16, var_16228_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; + fp16 var_16223_to_fp16 = const()[name = string("op_16223_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_16223_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; + tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; + tensor var_16242_to_fp16 = const()[name = string("op_16242_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851981184)))]; + tensor hidden_states_387_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_16242_to_fp16)[name = string("hidden_states_387_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_383_cast_fp16, y = hidden_states_387_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + int32 var_16293 = const()[name = string("op_16293"), val = int32(-1)]; + fp16 const_912_promoted_to_fp16 = const()[name = string("const_912_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16295_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = const_912_promoted_to_fp16)[name = string("op_16295_cast_fp16")]; + bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; + tensor input_481_cast_fp16 = concat(axis = var_16293, interleave = input_481_interleave_0, values = (hidden_states_389_cast_fp16, var_16295_cast_fp16))[name = string("input_481_cast_fp16")]; + tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; + fp16 var_16290_to_fp16 = const()[name = string("op_16290_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_16290_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; + tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; + tensor var_16309_to_fp16 = const()[name = string("op_16309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851983552)))]; + tensor hidden_states_391_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_16309_to_fp16)[name = string("hidden_states_391_cast_fp16")]; + tensor var_16314 = const()[name = string("op_16314"), val = tensor([0, 2, 1])]; + tensor var_16317_axes_0 = const()[name = string("op_16317_axes_0"), val = tensor([2])]; + tensor var_16315_cast_fp16 = transpose(perm = var_16314, x = hidden_states_391_cast_fp16)[name = string("transpose_28")]; + tensor var_16317_cast_fp16 = expand_dims(axes = var_16317_axes_0, x = var_16315_cast_fp16)[name = string("op_16317_cast_fp16")]; + string var_16333_pad_type_0 = const()[name = string("op_16333_pad_type_0"), val = string("valid")]; + tensor var_16333_strides_0 = const()[name = string("op_16333_strides_0"), val = tensor([1, 1])]; + tensor var_16333_pad_0 = const()[name = string("op_16333_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16333_dilations_0 = const()[name = string("op_16333_dilations_0"), val = tensor([1, 1])]; + int32 var_16333_groups_0 = const()[name = string("op_16333_groups_0"), val = int32(1)]; + tensor var_16333 = conv(dilations = var_16333_dilations_0, groups = var_16333_groups_0, pad = var_16333_pad_0, pad_type = var_16333_pad_type_0, strides = var_16333_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_16317_cast_fp16)[name = string("op_16333")]; + tensor var_16338 = const()[name = string("op_16338"), val = tensor([1, 4, 1, 256])]; + tensor var_16339 = reshape(shape = var_16338, x = var_16333)[name = string("op_16339")]; + string var_16355_pad_type_0 = const()[name = string("op_16355_pad_type_0"), val = string("valid")]; + tensor var_16355_strides_0 = const()[name = string("op_16355_strides_0"), val = tensor([1, 1])]; + tensor var_16355_pad_0 = const()[name = string("op_16355_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16355_dilations_0 = const()[name = string("op_16355_dilations_0"), val = tensor([1, 1])]; + int32 var_16355_groups_0 = const()[name = string("op_16355_groups_0"), val = int32(1)]; + tensor var_16355 = conv(dilations = var_16355_dilations_0, groups = var_16355_groups_0, pad = var_16355_pad_0, pad_type = var_16355_pad_type_0, strides = var_16355_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_16317_cast_fp16)[name = string("op_16355")]; + tensor var_16360 = const()[name = string("op_16360"), val = tensor([1, 1, 1, 256])]; + tensor var_16361 = reshape(shape = var_16360, x = var_16355)[name = string("op_16361")]; + string var_16377_pad_type_0 = const()[name = string("op_16377_pad_type_0"), val = string("valid")]; + tensor var_16377_strides_0 = const()[name = string("op_16377_strides_0"), val = tensor([1, 1])]; + tensor var_16377_pad_0 = const()[name = string("op_16377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16377_dilations_0 = const()[name = string("op_16377_dilations_0"), val = tensor([1, 1])]; + int32 var_16377_groups_0 = const()[name = string("op_16377_groups_0"), val = int32(1)]; + tensor var_16377 = conv(dilations = var_16377_dilations_0, groups = var_16377_groups_0, pad = var_16377_pad_0, pad_type = var_16377_pad_type_0, strides = var_16377_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_16317_cast_fp16)[name = string("op_16377")]; + tensor var_16382 = const()[name = string("op_16382"), val = tensor([1, 1, 1, 256])]; + tensor var_16383 = reshape(shape = var_16382, x = var_16377)[name = string("op_16383")]; + int32 var_16398 = const()[name = string("op_16398"), val = int32(-1)]; + fp16 const_916_promoted = const()[name = string("const_916_promoted"), val = fp16(-0x1p+0)]; + tensor var_16400 = mul(x = var_16339, y = const_916_promoted)[name = string("op_16400")]; + bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; + tensor input_485 = concat(axis = var_16398, interleave = input_485_interleave_0, values = (var_16339, var_16400))[name = string("input_485")]; + tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; + fp16 var_16395_to_fp16 = const()[name = string("op_16395_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_16395_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; + tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; + tensor var_16414_to_fp16 = const()[name = string("op_16414_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851985920)))]; + tensor q_49_cast_fp16 = mul(x = normed_583, y = var_16414_to_fp16)[name = string("q_49_cast_fp16")]; + int32 var_16425 = const()[name = string("op_16425"), val = int32(-1)]; + fp16 const_920_promoted = const()[name = string("const_920_promoted"), val = fp16(-0x1p+0)]; + tensor var_16427 = mul(x = var_16361, y = const_920_promoted)[name = string("op_16427")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487 = concat(axis = var_16425, interleave = input_487_interleave_0, values = (var_16361, var_16427))[name = string("input_487")]; + tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; + fp16 var_16422_to_fp16 = const()[name = string("op_16422_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_16422_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; + tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; + tensor var_16441_to_fp16 = const()[name = string("op_16441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851986496)))]; + tensor k_49_cast_fp16 = mul(x = normed_587, y = var_16441_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_16443_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16443_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_926_promoted_to_fp16 = const()[name = string("const_926_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16464_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_926_promoted_to_fp16)[name = string("op_16464_cast_fp16")]; + int32 var_16466 = const()[name = string("op_16466"), val = int32(-1)]; + bool var_16467_interleave_0 = const()[name = string("op_16467_interleave_0"), val = bool(false)]; + tensor var_16467_cast_fp16 = concat(axis = var_16466, interleave = var_16467_interleave_0, values = (var_16464_cast_fp16, x1_97_cast_fp16))[name = string("op_16467_cast_fp16")]; + tensor var_16468_cast_fp16 = mul(x = var_16467_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16468_cast_fp16")]; + tensor query_states_97_cast_fp16 = add(x = var_16443_cast_fp16, y = var_16468_cast_fp16)[name = string("query_states_97_cast_fp16")]; + tensor var_16471_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16471_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_929_promoted_to_fp16 = const()[name = string("const_929_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16492_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_929_promoted_to_fp16)[name = string("op_16492_cast_fp16")]; + int32 var_16494 = const()[name = string("op_16494"), val = int32(-1)]; + bool var_16495_interleave_0 = const()[name = string("op_16495_interleave_0"), val = bool(false)]; + tensor var_16495_cast_fp16 = concat(axis = var_16494, interleave = var_16495_interleave_0, values = (var_16492_cast_fp16, x1_99_cast_fp16))[name = string("op_16495_cast_fp16")]; + tensor var_16496_cast_fp16 = mul(x = var_16495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16496_cast_fp16")]; + tensor key_states_97_cast_fp16 = add(x = var_16471_cast_fp16, y = var_16496_cast_fp16)[name = string("key_states_97_cast_fp16")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([20])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([21])]; + int32 concat_194_axis_0 = const()[name = string("concat_194_axis_0"), val = int32(0)]; + bool concat_194_interleave_0 = const()[name = string("concat_194_interleave_0"), val = bool(false)]; + tensor concat_194 = concat(axis = concat_194_axis_0, interleave = concat_194_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_194")]; + tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; + tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (expand_dims_292, concat_195_values1_0, var_1955, concat_195_values3_0))[name = string("concat_195")]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_194, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_195, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = key_states_97_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_48")]; + tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([42])]; + tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; + tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; + tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([43])]; + int32 concat_198_axis_0 = const()[name = string("concat_198_axis_0"), val = int32(0)]; + bool concat_198_interleave_0 = const()[name = string("concat_198_interleave_0"), val = bool(false)]; + tensor concat_198 = concat(axis = concat_198_axis_0, interleave = concat_198_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_198")]; + tensor concat_199_values1_0 = const()[name = string("concat_199_values1_0"), val = tensor([0])]; + tensor concat_199_values3_0 = const()[name = string("concat_199_values3_0"), val = tensor([0])]; + int32 concat_199_axis_0 = const()[name = string("concat_199_axis_0"), val = int32(0)]; + bool concat_199_interleave_0 = const()[name = string("concat_199_interleave_0"), val = bool(false)]; + tensor concat_199 = concat(axis = concat_199_axis_0, interleave = concat_199_interleave_0, values = (expand_dims_298, concat_199_values1_0, var_1955, concat_199_values3_0))[name = string("concat_199")]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_198, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_199, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = var_16383, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_49")]; + tensor var_16551_begin_0 = const()[name = string("op_16551_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_16551_end_0 = const()[name = string("op_16551_end_0"), val = tensor([21, 1, 512, 256])]; + tensor var_16551_end_mask_0 = const()[name = string("op_16551_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16551_cast_fp16 = slice_by_index(begin = var_16551_begin_0, end = var_16551_end_0, end_mask = var_16551_end_mask_0, x = coreml_update_state_101)[name = string("op_16551_cast_fp16")]; + tensor var_16558_begin_0 = const()[name = string("op_16558_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_16558_end_0 = const()[name = string("op_16558_end_0"), val = tensor([43, 1, 512, 256])]; + tensor var_16558_end_mask_0 = const()[name = string("op_16558_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16558_cast_fp16 = slice_by_index(begin = var_16558_begin_0, end = var_16558_end_0, end_mask = var_16558_end_mask_0, x = coreml_update_state_101)[name = string("op_16558_cast_fp16")]; + tensor var_16595 = const()[name = string("op_16595"), val = tensor([1, 4, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_16595, x = var_16551_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_16615 = const()[name = string("op_16615"), val = tensor([1, 4, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_16615, x = var_16558_cast_fp16)[name = string("x_395_cast_fp16")]; + bool var_16642_transpose_x_1 = const()[name = string("op_16642_transpose_x_1"), val = bool(false)]; + bool var_16642_transpose_y_1 = const()[name = string("op_16642_transpose_y_1"), val = bool(true)]; + tensor var_16642 = matmul(transpose_x = var_16642_transpose_x_1, transpose_y = var_16642_transpose_y_1, x = query_states_97_cast_fp16, y = x_389_cast_fp16)[name = string("op_16642")]; + fp16 var_16643_to_fp16 = const()[name = string("op_16643_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_145_cast_fp16 = mul(x = var_16642, y = var_16643_to_fp16)[name = string("attn_weights_145_cast_fp16")]; + tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = var_2129)[name = string("attn_weights_147_cast_fp16")]; + int32 var_16678 = const()[name = string("op_16678"), val = int32(-1)]; + tensor attn_weights_149_cast_fp16 = softmax(axis = var_16678, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; + bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; + bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; + tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = x_395_cast_fp16)[name = string("attn_output_241_cast_fp16")]; + tensor var_16689_perm_0 = const()[name = string("op_16689_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16693 = const()[name = string("op_16693"), val = tensor([1, 1, 1024])]; + tensor var_16689_cast_fp16 = transpose(perm = var_16689_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_16693, x = var_16689_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_16698 = const()[name = string("op_16698"), val = tensor([0, 2, 1])]; + string var_16714_pad_type_0 = const()[name = string("op_16714_pad_type_0"), val = string("valid")]; + int32 var_16714_groups_0 = const()[name = string("op_16714_groups_0"), val = int32(1)]; + tensor var_16714_strides_0 = const()[name = string("op_16714_strides_0"), val = tensor([1])]; + tensor var_16714_pad_0 = const()[name = string("op_16714_pad_0"), val = tensor([0, 0])]; + tensor var_16714_dilations_0 = const()[name = string("op_16714_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851987072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852871872))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16699_cast_fp16 = transpose(perm = var_16698, x = attn_output_245_cast_fp16)[name = string("transpose_26")]; + tensor var_16714_cast_fp16 = conv(dilations = var_16714_dilations_0, groups = var_16714_groups_0, pad = var_16714_pad_0, pad_type = var_16714_pad_type_0, strides = var_16714_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_16699_cast_fp16)[name = string("op_16714_cast_fp16")]; + tensor var_16718 = const()[name = string("op_16718"), val = tensor([0, 2, 1])]; + int32 var_16729 = const()[name = string("op_16729"), val = int32(-1)]; + fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_397_cast_fp16 = transpose(perm = var_16718, x = var_16714_cast_fp16)[name = string("transpose_25")]; + tensor var_16731_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_16731_cast_fp16")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491_cast_fp16 = concat(axis = var_16729, interleave = input_491_interleave_0, values = (hidden_states_397_cast_fp16, var_16731_cast_fp16))[name = string("input_491_cast_fp16")]; + tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; + fp16 var_16726_to_fp16 = const()[name = string("op_16726_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_16726_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; + tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; + tensor var_16745_to_fp16 = const()[name = string("op_16745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852908800)))]; + tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_16745_to_fp16)[name = string("attn_output_249_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + int32 var_16758 = const()[name = string("op_16758"), val = int32(-1)]; + fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16760_cast_fp16 = mul(x = hidden_states_399_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_16760_cast_fp16")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493_cast_fp16 = concat(axis = var_16758, interleave = input_493_interleave_0, values = (hidden_states_399_cast_fp16, var_16760_cast_fp16))[name = string("input_493_cast_fp16")]; + tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; + fp16 var_16755_to_fp16 = const()[name = string("op_16755_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_16755_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; + tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; + tensor var_16774_to_fp16 = const()[name = string("op_16774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852911168)))]; + tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_16774_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_16786 = const()[name = string("op_16786"), val = tensor([0, 2, 1])]; + tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; + tensor var_16787_cast_fp16 = transpose(perm = var_16786, x = x_397_cast_fp16)[name = string("transpose_24")]; + tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_16787_cast_fp16)[name = string("input_495_cast_fp16")]; + string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; + tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; + tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; + int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852913536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858885568))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859106816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865078848))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; + string var_16812_mode_0 = const()[name = string("op_16812_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16812_cast_fp16 = gelu(mode = var_16812_mode_0, x = x_399_cast_fp16)[name = string("op_16812_cast_fp16")]; + tensor input_497_cast_fp16 = mul(x = var_16812_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865300096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871272128))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; + tensor var_16820_axes_0 = const()[name = string("op_16820_axes_0"), val = tensor([2])]; + tensor var_16820_cast_fp16 = squeeze(axes = var_16820_axes_0, x = e_49_cast_fp16)[name = string("op_16820_cast_fp16")]; + tensor var_16821 = const()[name = string("op_16821"), val = tensor([0, 2, 1])]; + int32 var_16832 = const()[name = string("op_16832"), val = int32(-1)]; + fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_401_cast_fp16 = transpose(perm = var_16821, x = var_16820_cast_fp16)[name = string("transpose_23")]; + tensor var_16834_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_16834_cast_fp16")]; + bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; + tensor input_499_cast_fp16 = concat(axis = var_16832, interleave = input_499_interleave_0, values = (hidden_states_401_cast_fp16, var_16834_cast_fp16))[name = string("input_499_cast_fp16")]; + tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; + fp16 var_16829_to_fp16 = const()[name = string("op_16829_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_16829_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; + tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; + tensor var_16848_to_fp16 = const()[name = string("op_16848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871309056)))]; + tensor hidden_states_403_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_16848_to_fp16)[name = string("hidden_states_403_cast_fp16")]; + tensor hidden_states_405_cast_fp16 = add(x = hidden_states_399_cast_fp16, y = hidden_states_403_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + int32 var_16899 = const()[name = string("op_16899"), val = int32(-1)]; + fp16 const_950_promoted_to_fp16 = const()[name = string("const_950_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16901_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = const_950_promoted_to_fp16)[name = string("op_16901_cast_fp16")]; + bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; + tensor input_501_cast_fp16 = concat(axis = var_16899, interleave = input_501_interleave_0, values = (hidden_states_405_cast_fp16, var_16901_cast_fp16))[name = string("input_501_cast_fp16")]; + tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; + fp16 var_16896_to_fp16 = const()[name = string("op_16896_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_16896_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; + tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; + tensor var_16915_to_fp16 = const()[name = string("op_16915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871311424)))]; + tensor hidden_states_407_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_16915_to_fp16)[name = string("hidden_states_407_cast_fp16")]; + tensor var_16920 = const()[name = string("op_16920"), val = tensor([0, 2, 1])]; + tensor var_16923_axes_0 = const()[name = string("op_16923_axes_0"), val = tensor([2])]; + tensor var_16921_cast_fp16 = transpose(perm = var_16920, x = hidden_states_407_cast_fp16)[name = string("transpose_22")]; + tensor var_16923_cast_fp16 = expand_dims(axes = var_16923_axes_0, x = var_16921_cast_fp16)[name = string("op_16923_cast_fp16")]; + string var_16939_pad_type_0 = const()[name = string("op_16939_pad_type_0"), val = string("valid")]; + tensor var_16939_strides_0 = const()[name = string("op_16939_strides_0"), val = tensor([1, 1])]; + tensor var_16939_pad_0 = const()[name = string("op_16939_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16939_dilations_0 = const()[name = string("op_16939_dilations_0"), val = tensor([1, 1])]; + int32 var_16939_groups_0 = const()[name = string("op_16939_groups_0"), val = int32(1)]; + tensor var_16939 = conv(dilations = var_16939_dilations_0, groups = var_16939_groups_0, pad = var_16939_pad_0, pad_type = var_16939_pad_type_0, strides = var_16939_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_16923_cast_fp16)[name = string("op_16939")]; + tensor var_16944 = const()[name = string("op_16944"), val = tensor([1, 4, 1, 256])]; + tensor var_16945 = reshape(shape = var_16944, x = var_16939)[name = string("op_16945")]; + string var_16961_pad_type_0 = const()[name = string("op_16961_pad_type_0"), val = string("valid")]; + tensor var_16961_strides_0 = const()[name = string("op_16961_strides_0"), val = tensor([1, 1])]; + tensor var_16961_pad_0 = const()[name = string("op_16961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16961_dilations_0 = const()[name = string("op_16961_dilations_0"), val = tensor([1, 1])]; + int32 var_16961_groups_0 = const()[name = string("op_16961_groups_0"), val = int32(1)]; + tensor var_16961 = conv(dilations = var_16961_dilations_0, groups = var_16961_groups_0, pad = var_16961_pad_0, pad_type = var_16961_pad_type_0, strides = var_16961_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_16923_cast_fp16)[name = string("op_16961")]; + tensor var_16966 = const()[name = string("op_16966"), val = tensor([1, 1, 1, 256])]; + tensor var_16967 = reshape(shape = var_16966, x = var_16961)[name = string("op_16967")]; + string var_16983_pad_type_0 = const()[name = string("op_16983_pad_type_0"), val = string("valid")]; + tensor var_16983_strides_0 = const()[name = string("op_16983_strides_0"), val = tensor([1, 1])]; + tensor var_16983_pad_0 = const()[name = string("op_16983_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16983_dilations_0 = const()[name = string("op_16983_dilations_0"), val = tensor([1, 1])]; + int32 var_16983_groups_0 = const()[name = string("op_16983_groups_0"), val = int32(1)]; + tensor var_16983 = conv(dilations = var_16983_dilations_0, groups = var_16983_groups_0, pad = var_16983_pad_0, pad_type = var_16983_pad_type_0, strides = var_16983_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_16923_cast_fp16)[name = string("op_16983")]; + tensor var_16988 = const()[name = string("op_16988"), val = tensor([1, 1, 1, 256])]; + tensor var_16989 = reshape(shape = var_16988, x = var_16983)[name = string("op_16989")]; + int32 var_17004 = const()[name = string("op_17004"), val = int32(-1)]; + fp16 const_954_promoted = const()[name = string("const_954_promoted"), val = fp16(-0x1p+0)]; + tensor var_17006 = mul(x = var_16945, y = const_954_promoted)[name = string("op_17006")]; + bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; + tensor input_505 = concat(axis = var_17004, interleave = input_505_interleave_0, values = (var_16945, var_17006))[name = string("input_505")]; + tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; + fp16 var_17001_to_fp16 = const()[name = string("op_17001_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_17001_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; + tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; + tensor var_17020_to_fp16 = const()[name = string("op_17020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871313792)))]; + tensor q_cast_fp16 = mul(x = normed_607, y = var_17020_to_fp16)[name = string("q_cast_fp16")]; + int32 var_17031 = const()[name = string("op_17031"), val = int32(-1)]; + fp16 const_958_promoted = const()[name = string("const_958_promoted"), val = fp16(-0x1p+0)]; + tensor var_17033 = mul(x = var_16967, y = const_958_promoted)[name = string("op_17033")]; + bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; + tensor input_507 = concat(axis = var_17031, interleave = input_507_interleave_0, values = (var_16967, var_17033))[name = string("input_507")]; + tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; + fp16 var_17028_to_fp16 = const()[name = string("op_17028_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_17028_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; + tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; + tensor var_17047_to_fp16 = const()[name = string("op_17047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314368)))]; + tensor k_cast_fp16 = mul(x = normed_611, y = var_17047_to_fp16)[name = string("k_cast_fp16")]; + tensor var_17049_cast_fp16 = mul(x = q_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17049_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_964_promoted_to_fp16 = const()[name = string("const_964_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17070_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_964_promoted_to_fp16)[name = string("op_17070_cast_fp16")]; + int32 var_17072 = const()[name = string("op_17072"), val = int32(-1)]; + bool var_17073_interleave_0 = const()[name = string("op_17073_interleave_0"), val = bool(false)]; + tensor var_17073_cast_fp16 = concat(axis = var_17072, interleave = var_17073_interleave_0, values = (var_17070_cast_fp16, x1_101_cast_fp16))[name = string("op_17073_cast_fp16")]; + tensor var_17074_cast_fp16 = mul(x = var_17073_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17074_cast_fp16")]; + tensor query_states_101_cast_fp16 = add(x = var_17049_cast_fp16, y = var_17074_cast_fp16)[name = string("query_states_101_cast_fp16")]; + tensor var_17077_cast_fp16 = mul(x = k_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17077_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_967_promoted_to_fp16 = const()[name = string("const_967_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17098_cast_fp16 = mul(x = x2_cast_fp16, y = const_967_promoted_to_fp16)[name = string("op_17098_cast_fp16")]; + int32 var_17100 = const()[name = string("op_17100"), val = int32(-1)]; + bool var_17101_interleave_0 = const()[name = string("op_17101_interleave_0"), val = bool(false)]; + tensor var_17101_cast_fp16 = concat(axis = var_17100, interleave = var_17101_interleave_0, values = (var_17098_cast_fp16, x1_cast_fp16))[name = string("op_17101_cast_fp16")]; + tensor var_17102_cast_fp16 = mul(x = var_17101_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17102_cast_fp16")]; + tensor key_states_101_cast_fp16 = add(x = var_17077_cast_fp16, y = var_17102_cast_fp16)[name = string("key_states_101_cast_fp16")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([21])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([22])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_202")]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (expand_dims_304, concat_203_values1_0, var_1955, concat_203_values3_0))[name = string("concat_203")]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_202, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_203, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = key_states_101_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_50")]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([43])]; + tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; + tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; + tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([44])]; + int32 concat_206_axis_0 = const()[name = string("concat_206_axis_0"), val = int32(0)]; + bool concat_206_interleave_0 = const()[name = string("concat_206_interleave_0"), val = bool(false)]; + tensor concat_206 = concat(axis = concat_206_axis_0, interleave = concat_206_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_206")]; + tensor concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = tensor([0])]; + tensor concat_207_values3_0 = const()[name = string("concat_207_values3_0"), val = tensor([0])]; + int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; + bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; + tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (expand_dims_310, concat_207_values1_0, var_1955, concat_207_values3_0))[name = string("concat_207")]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_206, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_207, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = var_16989, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_51")]; + tensor var_17157_begin_0 = const()[name = string("op_17157_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_17157_end_0 = const()[name = string("op_17157_end_0"), val = tensor([22, 1, 512, 256])]; + tensor var_17157_end_mask_0 = const()[name = string("op_17157_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17157_cast_fp16 = slice_by_index(begin = var_17157_begin_0, end = var_17157_end_0, end_mask = var_17157_end_mask_0, x = coreml_update_state_103)[name = string("op_17157_cast_fp16")]; + tensor var_17164_begin_0 = const()[name = string("op_17164_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_17164_end_0 = const()[name = string("op_17164_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_17164_end_mask_0 = const()[name = string("op_17164_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_17164_cast_fp16 = slice_by_index(begin = var_17164_begin_0, end = var_17164_end_0, end_mask = var_17164_end_mask_0, x = coreml_update_state_103)[name = string("op_17164_cast_fp16")]; + tensor var_17201 = const()[name = string("op_17201"), val = tensor([1, 4, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_17201, x = var_17157_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_17221 = const()[name = string("op_17221"), val = tensor([1, 4, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_17221, x = var_17164_cast_fp16)[name = string("x_411_cast_fp16")]; + bool var_17248_transpose_x_1 = const()[name = string("op_17248_transpose_x_1"), val = bool(false)]; + bool var_17248_transpose_y_1 = const()[name = string("op_17248_transpose_y_1"), val = bool(true)]; + tensor var_17248 = matmul(transpose_x = var_17248_transpose_x_1, transpose_y = var_17248_transpose_y_1, x = query_states_101_cast_fp16, y = x_405_cast_fp16)[name = string("op_17248")]; + fp16 var_17249_to_fp16 = const()[name = string("op_17249_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_151_cast_fp16 = mul(x = var_17248, y = var_17249_to_fp16)[name = string("attn_weights_151_cast_fp16")]; + tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = var_2129)[name = string("attn_weights_153_cast_fp16")]; + int32 var_17284 = const()[name = string("op_17284"), val = int32(-1)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_17284, x = attn_weights_153_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; + bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; + tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_cast_fp16, y = x_411_cast_fp16)[name = string("attn_output_251_cast_fp16")]; + tensor var_17295_perm_0 = const()[name = string("op_17295_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17299 = const()[name = string("op_17299"), val = tensor([1, 1, 1024])]; + tensor var_17295_cast_fp16 = transpose(perm = var_17295_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_21")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_17299, x = var_17295_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_17304 = const()[name = string("op_17304"), val = tensor([0, 2, 1])]; + string var_17320_pad_type_0 = const()[name = string("op_17320_pad_type_0"), val = string("valid")]; + int32 var_17320_groups_0 = const()[name = string("op_17320_groups_0"), val = int32(1)]; + tensor var_17320_strides_0 = const()[name = string("op_17320_strides_0"), val = tensor([1])]; + tensor var_17320_pad_0 = const()[name = string("op_17320_pad_0"), val = tensor([0, 0])]; + tensor var_17320_dilations_0 = const()[name = string("op_17320_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872199744))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17305_cast_fp16 = transpose(perm = var_17304, x = attn_output_255_cast_fp16)[name = string("transpose_20")]; + tensor var_17320_cast_fp16 = conv(dilations = var_17320_dilations_0, groups = var_17320_groups_0, pad = var_17320_pad_0, pad_type = var_17320_pad_type_0, strides = var_17320_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_17305_cast_fp16)[name = string("op_17320_cast_fp16")]; + tensor var_17324 = const()[name = string("op_17324"), val = tensor([0, 2, 1])]; + int32 var_17335 = const()[name = string("op_17335"), val = int32(-1)]; + fp16 const_976_promoted_to_fp16 = const()[name = string("const_976_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_413_cast_fp16 = transpose(perm = var_17324, x = var_17320_cast_fp16)[name = string("transpose_19")]; + tensor var_17337_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_976_promoted_to_fp16)[name = string("op_17337_cast_fp16")]; + bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; + tensor input_511_cast_fp16 = concat(axis = var_17335, interleave = input_511_interleave_0, values = (hidden_states_413_cast_fp16, var_17337_cast_fp16))[name = string("input_511_cast_fp16")]; + tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; + fp16 var_17332_to_fp16 = const()[name = string("op_17332_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_17332_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; + tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; + tensor var_17351_to_fp16 = const()[name = string("op_17351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872236672)))]; + tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_17351_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; + int32 var_17364 = const()[name = string("op_17364"), val = int32(-1)]; + fp16 const_980_promoted_to_fp16 = const()[name = string("const_980_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17366_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = const_980_promoted_to_fp16)[name = string("op_17366_cast_fp16")]; + bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; + tensor input_513_cast_fp16 = concat(axis = var_17364, interleave = input_513_interleave_0, values = (hidden_states_415_cast_fp16, var_17366_cast_fp16))[name = string("input_513_cast_fp16")]; + tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; + fp16 var_17361_to_fp16 = const()[name = string("op_17361_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_17361_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; + tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; + tensor var_17380_to_fp16 = const()[name = string("op_17380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872239040)))]; + tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_17380_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_17392 = const()[name = string("op_17392"), val = tensor([0, 2, 1])]; + tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; + tensor var_17393_cast_fp16 = transpose(perm = var_17392, x = x_413_cast_fp16)[name = string("transpose_18")]; + tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_17393_cast_fp16)[name = string("input_515_cast_fp16")]; + string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; + tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; + tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; + int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872241408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878213440))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878434688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884406720))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; + string var_17418_mode_0 = const()[name = string("op_17418_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17418_cast_fp16 = gelu(mode = var_17418_mode_0, x = x_cast_fp16)[name = string("op_17418_cast_fp16")]; + tensor input_517_cast_fp16 = mul(x = var_17418_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884627968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890600000))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; + tensor var_17426_axes_0 = const()[name = string("op_17426_axes_0"), val = tensor([2])]; + tensor var_17426_cast_fp16 = squeeze(axes = var_17426_axes_0, x = e_cast_fp16)[name = string("op_17426_cast_fp16")]; + tensor var_17427 = const()[name = string("op_17427"), val = tensor([0, 2, 1])]; + int32 var_17438 = const()[name = string("op_17438"), val = int32(-1)]; + fp16 const_984_promoted_to_fp16 = const()[name = string("const_984_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_417_cast_fp16 = transpose(perm = var_17427, x = var_17426_cast_fp16)[name = string("transpose_17")]; + tensor var_17440_cast_fp16 = mul(x = hidden_states_417_cast_fp16, y = const_984_promoted_to_fp16)[name = string("op_17440_cast_fp16")]; + bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; + tensor input_519_cast_fp16 = concat(axis = var_17438, interleave = input_519_interleave_0, values = (hidden_states_417_cast_fp16, var_17440_cast_fp16))[name = string("input_519_cast_fp16")]; + tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; + fp16 var_17435_to_fp16 = const()[name = string("op_17435_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_17435_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; + tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; + tensor var_17454_to_fp16 = const()[name = string("op_17454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890636928)))]; + tensor hidden_states_419_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_17454_to_fp16)[name = string("hidden_states_419_cast_fp16")]; + tensor hidden_states_421_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; + int32 var_17467 = const()[name = string("op_17467"), val = int32(-1)]; + fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17469_cast_fp16 = mul(x = hidden_states_421_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_17469_cast_fp16")]; + bool input_521_interleave_0 = const()[name = string("input_521_interleave_0"), val = bool(false)]; + tensor input_521_cast_fp16 = concat(axis = var_17467, interleave = input_521_interleave_0, values = (hidden_states_421_cast_fp16, var_17469_cast_fp16))[name = string("input_521_cast_fp16")]; + tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; + fp16 var_17464_to_fp16 = const()[name = string("op_17464_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_17464_to_fp16, x = input_521_cast_fp16)[name = string("normed_625_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; + tensor var_17483_to_fp16 = const()[name = string("op_17483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890639296)))]; + tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_17483_to_fp16)[name = string("hidden_states_cast_fp16")]; + tensor var_17488 = const()[name = string("op_17488"), val = tensor([0, 2, 1])]; + tensor input_axes_0 = const()[name = string("input_axes_0"), val = tensor([2])]; + tensor var_17489_cast_fp16 = transpose(perm = var_17488, x = hidden_states_cast_fp16)[name = string("transpose_16")]; + tensor input_cast_fp16 = expand_dims(axes = input_axes_0, x = var_17489_cast_fp16)[name = string("input_cast_fp16")]; + string var_17502_pad_type_0 = const()[name = string("op_17502_pad_type_0"), val = string("valid")]; + tensor var_17502_strides_0 = const()[name = string("op_17502_strides_0"), val = tensor([1, 1])]; + tensor var_17502_pad_0 = const()[name = string("op_17502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17502_dilations_0 = const()[name = string("op_17502_dilations_0"), val = tensor([1, 1])]; + int32 var_17502_groups_0 = const()[name = string("op_17502_groups_0"), val = int32(1)]; + tensor model_lm_head16_1_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890641664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(904797504))))[name = string("model_lm_head16_1_weight_promoted_to_fp16_palettized")]; + tensor var_17502_cast_fp16 = conv(dilations = var_17502_dilations_0, groups = var_17502_groups_0, pad = var_17502_pad_0, pad_type = var_17502_pad_type_0, strides = var_17502_strides_0, weight = model_lm_head16_1_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17502_cast_fp16")]; + tensor var_17504_axes_0 = const()[name = string("op_17504_axes_0"), val = tensor([2])]; + tensor var_17504_cast_fp16 = squeeze(axes = var_17504_axes_0, x = var_17502_cast_fp16)[name = string("op_17504_cast_fp16")]; + tensor logits_1_perm_0 = const()[name = string("logits_1_perm_0"), val = tensor([0, 2, 1])]; + string var_17518_pad_type_0 = const()[name = string("op_17518_pad_type_0"), val = string("valid")]; + tensor var_17518_strides_0 = const()[name = string("op_17518_strides_0"), val = tensor([1, 1])]; + tensor var_17518_pad_0 = const()[name = string("op_17518_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17518_dilations_0 = const()[name = string("op_17518_dilations_0"), val = tensor([1, 1])]; + int32 var_17518_groups_0 = const()[name = string("op_17518_groups_0"), val = int32(1)]; + tensor model_lm_head16_2_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905321856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(919477696))))[name = string("model_lm_head16_2_weight_promoted_to_fp16_palettized")]; + tensor var_17518_cast_fp16 = conv(dilations = var_17518_dilations_0, groups = var_17518_groups_0, pad = var_17518_pad_0, pad_type = var_17518_pad_type_0, strides = var_17518_strides_0, weight = model_lm_head16_2_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17518_cast_fp16")]; + tensor var_17520_axes_0 = const()[name = string("op_17520_axes_0"), val = tensor([2])]; + tensor var_17520_cast_fp16 = squeeze(axes = var_17520_axes_0, x = var_17518_cast_fp16)[name = string("op_17520_cast_fp16")]; + tensor logits_3_perm_0 = const()[name = string("logits_3_perm_0"), val = tensor([0, 2, 1])]; + string var_17534_pad_type_0 = const()[name = string("op_17534_pad_type_0"), val = string("valid")]; + tensor var_17534_strides_0 = const()[name = string("op_17534_strides_0"), val = tensor([1, 1])]; + tensor var_17534_pad_0 = const()[name = string("op_17534_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17534_dilations_0 = const()[name = string("op_17534_dilations_0"), val = tensor([1, 1])]; + int32 var_17534_groups_0 = const()[name = string("op_17534_groups_0"), val = int32(1)]; + tensor model_lm_head16_3_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920002048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934157888))))[name = string("model_lm_head16_3_weight_promoted_to_fp16_palettized")]; + tensor var_17534_cast_fp16 = conv(dilations = var_17534_dilations_0, groups = var_17534_groups_0, pad = var_17534_pad_0, pad_type = var_17534_pad_type_0, strides = var_17534_strides_0, weight = model_lm_head16_3_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17534_cast_fp16")]; + tensor var_17536_axes_0 = const()[name = string("op_17536_axes_0"), val = tensor([2])]; + tensor var_17536_cast_fp16 = squeeze(axes = var_17536_axes_0, x = var_17534_cast_fp16)[name = string("op_17536_cast_fp16")]; + tensor logits_5_perm_0 = const()[name = string("logits_5_perm_0"), val = tensor([0, 2, 1])]; + string var_17550_pad_type_0 = const()[name = string("op_17550_pad_type_0"), val = string("valid")]; + tensor var_17550_strides_0 = const()[name = string("op_17550_strides_0"), val = tensor([1, 1])]; + tensor var_17550_pad_0 = const()[name = string("op_17550_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17550_dilations_0 = const()[name = string("op_17550_dilations_0"), val = tensor([1, 1])]; + int32 var_17550_groups_0 = const()[name = string("op_17550_groups_0"), val = int32(1)]; + tensor model_lm_head16_4_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934682240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948838080))))[name = string("model_lm_head16_4_weight_promoted_to_fp16_palettized")]; + tensor var_17550_cast_fp16 = conv(dilations = var_17550_dilations_0, groups = var_17550_groups_0, pad = var_17550_pad_0, pad_type = var_17550_pad_type_0, strides = var_17550_strides_0, weight = model_lm_head16_4_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17550_cast_fp16")]; + tensor var_17552_axes_0 = const()[name = string("op_17552_axes_0"), val = tensor([2])]; + tensor var_17552_cast_fp16 = squeeze(axes = var_17552_axes_0, x = var_17550_cast_fp16)[name = string("op_17552_cast_fp16")]; + tensor logits_7_perm_0 = const()[name = string("logits_7_perm_0"), val = tensor([0, 2, 1])]; + string var_17566_pad_type_0 = const()[name = string("op_17566_pad_type_0"), val = string("valid")]; + tensor var_17566_strides_0 = const()[name = string("op_17566_strides_0"), val = tensor([1, 1])]; + tensor var_17566_pad_0 = const()[name = string("op_17566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17566_dilations_0 = const()[name = string("op_17566_dilations_0"), val = tensor([1, 1])]; + int32 var_17566_groups_0 = const()[name = string("op_17566_groups_0"), val = int32(1)]; + tensor model_lm_head16_5_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949362432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(963518272))))[name = string("model_lm_head16_5_weight_promoted_to_fp16_palettized")]; + tensor var_17566_cast_fp16 = conv(dilations = var_17566_dilations_0, groups = var_17566_groups_0, pad = var_17566_pad_0, pad_type = var_17566_pad_type_0, strides = var_17566_strides_0, weight = model_lm_head16_5_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17566_cast_fp16")]; + tensor var_17568_axes_0 = const()[name = string("op_17568_axes_0"), val = tensor([2])]; + tensor var_17568_cast_fp16 = squeeze(axes = var_17568_axes_0, x = var_17566_cast_fp16)[name = string("op_17568_cast_fp16")]; + tensor logits_9_perm_0 = const()[name = string("logits_9_perm_0"), val = tensor([0, 2, 1])]; + string var_17582_pad_type_0 = const()[name = string("op_17582_pad_type_0"), val = string("valid")]; + tensor var_17582_strides_0 = const()[name = string("op_17582_strides_0"), val = tensor([1, 1])]; + tensor var_17582_pad_0 = const()[name = string("op_17582_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17582_dilations_0 = const()[name = string("op_17582_dilations_0"), val = tensor([1, 1])]; + int32 var_17582_groups_0 = const()[name = string("op_17582_groups_0"), val = int32(1)]; + tensor model_lm_head16_6_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964042624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978198464))))[name = string("model_lm_head16_6_weight_promoted_to_fp16_palettized")]; + tensor var_17582_cast_fp16 = conv(dilations = var_17582_dilations_0, groups = var_17582_groups_0, pad = var_17582_pad_0, pad_type = var_17582_pad_type_0, strides = var_17582_strides_0, weight = model_lm_head16_6_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17582_cast_fp16")]; + tensor var_17584_axes_0 = const()[name = string("op_17584_axes_0"), val = tensor([2])]; + tensor var_17584_cast_fp16 = squeeze(axes = var_17584_axes_0, x = var_17582_cast_fp16)[name = string("op_17584_cast_fp16")]; + tensor logits_11_perm_0 = const()[name = string("logits_11_perm_0"), val = tensor([0, 2, 1])]; + string var_17598_pad_type_0 = const()[name = string("op_17598_pad_type_0"), val = string("valid")]; + tensor var_17598_strides_0 = const()[name = string("op_17598_strides_0"), val = tensor([1, 1])]; + tensor var_17598_pad_0 = const()[name = string("op_17598_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17598_dilations_0 = const()[name = string("op_17598_dilations_0"), val = tensor([1, 1])]; + int32 var_17598_groups_0 = const()[name = string("op_17598_groups_0"), val = int32(1)]; + tensor model_lm_head16_7_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978722816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(992878656))))[name = string("model_lm_head16_7_weight_promoted_to_fp16_palettized")]; + tensor var_17598_cast_fp16 = conv(dilations = var_17598_dilations_0, groups = var_17598_groups_0, pad = var_17598_pad_0, pad_type = var_17598_pad_type_0, strides = var_17598_strides_0, weight = model_lm_head16_7_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17598_cast_fp16")]; + tensor var_17600_axes_0 = const()[name = string("op_17600_axes_0"), val = tensor([2])]; + tensor var_17600_cast_fp16 = squeeze(axes = var_17600_axes_0, x = var_17598_cast_fp16)[name = string("op_17600_cast_fp16")]; + tensor logits_13_perm_0 = const()[name = string("logits_13_perm_0"), val = tensor([0, 2, 1])]; + string var_17614_pad_type_0 = const()[name = string("op_17614_pad_type_0"), val = string("valid")]; + tensor var_17614_strides_0 = const()[name = string("op_17614_strides_0"), val = tensor([1, 1])]; + tensor var_17614_pad_0 = const()[name = string("op_17614_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17614_dilations_0 = const()[name = string("op_17614_dilations_0"), val = tensor([1, 1])]; + int32 var_17614_groups_0 = const()[name = string("op_17614_groups_0"), val = int32(1)]; + tensor model_lm_head16_8_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(993403008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007558848))))[name = string("model_lm_head16_8_weight_promoted_to_fp16_palettized")]; + tensor var_17614_cast_fp16 = conv(dilations = var_17614_dilations_0, groups = var_17614_groups_0, pad = var_17614_pad_0, pad_type = var_17614_pad_type_0, strides = var_17614_strides_0, weight = model_lm_head16_8_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17614_cast_fp16")]; + tensor var_17616_axes_0 = const()[name = string("op_17616_axes_0"), val = tensor([2])]; + tensor var_17616_cast_fp16 = squeeze(axes = var_17616_axes_0, x = var_17614_cast_fp16)[name = string("op_17616_cast_fp16")]; + tensor logits_15_perm_0 = const()[name = string("logits_15_perm_0"), val = tensor([0, 2, 1])]; + string var_17630_pad_type_0 = const()[name = string("op_17630_pad_type_0"), val = string("valid")]; + tensor var_17630_strides_0 = const()[name = string("op_17630_strides_0"), val = tensor([1, 1])]; + tensor var_17630_pad_0 = const()[name = string("op_17630_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17630_dilations_0 = const()[name = string("op_17630_dilations_0"), val = tensor([1, 1])]; + int32 var_17630_groups_0 = const()[name = string("op_17630_groups_0"), val = int32(1)]; + tensor model_lm_head16_9_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1008083200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022239040))))[name = string("model_lm_head16_9_weight_promoted_to_fp16_palettized")]; + tensor var_17630_cast_fp16 = conv(dilations = var_17630_dilations_0, groups = var_17630_groups_0, pad = var_17630_pad_0, pad_type = var_17630_pad_type_0, strides = var_17630_strides_0, weight = model_lm_head16_9_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17630_cast_fp16")]; + tensor var_17632_axes_0 = const()[name = string("op_17632_axes_0"), val = tensor([2])]; + tensor var_17632_cast_fp16 = squeeze(axes = var_17632_axes_0, x = var_17630_cast_fp16)[name = string("op_17632_cast_fp16")]; + tensor logits_17_perm_0 = const()[name = string("logits_17_perm_0"), val = tensor([0, 2, 1])]; + string var_17646_pad_type_0 = const()[name = string("op_17646_pad_type_0"), val = string("valid")]; + tensor var_17646_strides_0 = const()[name = string("op_17646_strides_0"), val = tensor([1, 1])]; + tensor var_17646_pad_0 = const()[name = string("op_17646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17646_dilations_0 = const()[name = string("op_17646_dilations_0"), val = tensor([1, 1])]; + int32 var_17646_groups_0 = const()[name = string("op_17646_groups_0"), val = int32(1)]; + tensor model_lm_head16_10_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022763392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036919232))))[name = string("model_lm_head16_10_weight_promoted_to_fp16_palettized")]; + tensor var_17646_cast_fp16 = conv(dilations = var_17646_dilations_0, groups = var_17646_groups_0, pad = var_17646_pad_0, pad_type = var_17646_pad_type_0, strides = var_17646_strides_0, weight = model_lm_head16_10_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17646_cast_fp16")]; + tensor var_17648_axes_0 = const()[name = string("op_17648_axes_0"), val = tensor([2])]; + tensor var_17648_cast_fp16 = squeeze(axes = var_17648_axes_0, x = var_17646_cast_fp16)[name = string("op_17648_cast_fp16")]; + tensor logits_19_perm_0 = const()[name = string("logits_19_perm_0"), val = tensor([0, 2, 1])]; + string var_17662_pad_type_0 = const()[name = string("op_17662_pad_type_0"), val = string("valid")]; + tensor var_17662_strides_0 = const()[name = string("op_17662_strides_0"), val = tensor([1, 1])]; + tensor var_17662_pad_0 = const()[name = string("op_17662_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17662_dilations_0 = const()[name = string("op_17662_dilations_0"), val = tensor([1, 1])]; + int32 var_17662_groups_0 = const()[name = string("op_17662_groups_0"), val = int32(1)]; + tensor model_lm_head16_11_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1037443584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1051599424))))[name = string("model_lm_head16_11_weight_promoted_to_fp16_palettized")]; + tensor var_17662_cast_fp16 = conv(dilations = var_17662_dilations_0, groups = var_17662_groups_0, pad = var_17662_pad_0, pad_type = var_17662_pad_type_0, strides = var_17662_strides_0, weight = model_lm_head16_11_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17662_cast_fp16")]; + tensor var_17664_axes_0 = const()[name = string("op_17664_axes_0"), val = tensor([2])]; + tensor var_17664_cast_fp16 = squeeze(axes = var_17664_axes_0, x = var_17662_cast_fp16)[name = string("op_17664_cast_fp16")]; + tensor logits_21_perm_0 = const()[name = string("logits_21_perm_0"), val = tensor([0, 2, 1])]; + string var_17678_pad_type_0 = const()[name = string("op_17678_pad_type_0"), val = string("valid")]; + tensor var_17678_strides_0 = const()[name = string("op_17678_strides_0"), val = tensor([1, 1])]; + tensor var_17678_pad_0 = const()[name = string("op_17678_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17678_dilations_0 = const()[name = string("op_17678_dilations_0"), val = tensor([1, 1])]; + int32 var_17678_groups_0 = const()[name = string("op_17678_groups_0"), val = int32(1)]; + tensor model_lm_head16_12_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052123776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066279616))))[name = string("model_lm_head16_12_weight_promoted_to_fp16_palettized")]; + tensor var_17678_cast_fp16 = conv(dilations = var_17678_dilations_0, groups = var_17678_groups_0, pad = var_17678_pad_0, pad_type = var_17678_pad_type_0, strides = var_17678_strides_0, weight = model_lm_head16_12_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17678_cast_fp16")]; + tensor var_17680_axes_0 = const()[name = string("op_17680_axes_0"), val = tensor([2])]; + tensor var_17680_cast_fp16 = squeeze(axes = var_17680_axes_0, x = var_17678_cast_fp16)[name = string("op_17680_cast_fp16")]; + tensor logits_23_perm_0 = const()[name = string("logits_23_perm_0"), val = tensor([0, 2, 1])]; + string var_17694_pad_type_0 = const()[name = string("op_17694_pad_type_0"), val = string("valid")]; + tensor var_17694_strides_0 = const()[name = string("op_17694_strides_0"), val = tensor([1, 1])]; + tensor var_17694_pad_0 = const()[name = string("op_17694_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17694_dilations_0 = const()[name = string("op_17694_dilations_0"), val = tensor([1, 1])]; + int32 var_17694_groups_0 = const()[name = string("op_17694_groups_0"), val = int32(1)]; + tensor model_lm_head16_13_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066803968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080959808))))[name = string("model_lm_head16_13_weight_promoted_to_fp16_palettized")]; + tensor var_17694_cast_fp16 = conv(dilations = var_17694_dilations_0, groups = var_17694_groups_0, pad = var_17694_pad_0, pad_type = var_17694_pad_type_0, strides = var_17694_strides_0, weight = model_lm_head16_13_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17694_cast_fp16")]; + tensor var_17696_axes_0 = const()[name = string("op_17696_axes_0"), val = tensor([2])]; + tensor var_17696_cast_fp16 = squeeze(axes = var_17696_axes_0, x = var_17694_cast_fp16)[name = string("op_17696_cast_fp16")]; + tensor logits_25_perm_0 = const()[name = string("logits_25_perm_0"), val = tensor([0, 2, 1])]; + string var_17710_pad_type_0 = const()[name = string("op_17710_pad_type_0"), val = string("valid")]; + tensor var_17710_strides_0 = const()[name = string("op_17710_strides_0"), val = tensor([1, 1])]; + tensor var_17710_pad_0 = const()[name = string("op_17710_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17710_dilations_0 = const()[name = string("op_17710_dilations_0"), val = tensor([1, 1])]; + int32 var_17710_groups_0 = const()[name = string("op_17710_groups_0"), val = int32(1)]; + tensor model_lm_head16_14_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1081484160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1095640000))))[name = string("model_lm_head16_14_weight_promoted_to_fp16_palettized")]; + tensor var_17710_cast_fp16 = conv(dilations = var_17710_dilations_0, groups = var_17710_groups_0, pad = var_17710_pad_0, pad_type = var_17710_pad_type_0, strides = var_17710_strides_0, weight = model_lm_head16_14_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17710_cast_fp16")]; + tensor var_17712_axes_0 = const()[name = string("op_17712_axes_0"), val = tensor([2])]; + tensor var_17712_cast_fp16 = squeeze(axes = var_17712_axes_0, x = var_17710_cast_fp16)[name = string("op_17712_cast_fp16")]; + tensor logits_27_perm_0 = const()[name = string("logits_27_perm_0"), val = tensor([0, 2, 1])]; + string var_17726_pad_type_0 = const()[name = string("op_17726_pad_type_0"), val = string("valid")]; + tensor var_17726_strides_0 = const()[name = string("op_17726_strides_0"), val = tensor([1, 1])]; + tensor var_17726_pad_0 = const()[name = string("op_17726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17726_dilations_0 = const()[name = string("op_17726_dilations_0"), val = tensor([1, 1])]; + int32 var_17726_groups_0 = const()[name = string("op_17726_groups_0"), val = int32(1)]; + tensor model_lm_head16_15_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096164352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110320192))))[name = string("model_lm_head16_15_weight_promoted_to_fp16_palettized")]; + tensor var_17726_cast_fp16 = conv(dilations = var_17726_dilations_0, groups = var_17726_groups_0, pad = var_17726_pad_0, pad_type = var_17726_pad_type_0, strides = var_17726_strides_0, weight = model_lm_head16_15_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17726_cast_fp16")]; + tensor var_17728_axes_0 = const()[name = string("op_17728_axes_0"), val = tensor([2])]; + tensor var_17728_cast_fp16 = squeeze(axes = var_17728_axes_0, x = var_17726_cast_fp16)[name = string("op_17728_cast_fp16")]; + tensor logits_29_perm_0 = const()[name = string("logits_29_perm_0"), val = tensor([0, 2, 1])]; + string var_17742_pad_type_0 = const()[name = string("op_17742_pad_type_0"), val = string("valid")]; + tensor var_17742_strides_0 = const()[name = string("op_17742_strides_0"), val = tensor([1, 1])]; + tensor var_17742_pad_0 = const()[name = string("op_17742_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17742_dilations_0 = const()[name = string("op_17742_dilations_0"), val = tensor([1, 1])]; + int32 var_17742_groups_0 = const()[name = string("op_17742_groups_0"), val = int32(1)]; + tensor model_lm_head16_16_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110844544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125000384))))[name = string("model_lm_head16_16_weight_promoted_to_fp16_palettized")]; + tensor var_17742_cast_fp16 = conv(dilations = var_17742_dilations_0, groups = var_17742_groups_0, pad = var_17742_pad_0, pad_type = var_17742_pad_type_0, strides = var_17742_strides_0, weight = model_lm_head16_16_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17742_cast_fp16")]; + tensor var_17744_axes_0 = const()[name = string("op_17744_axes_0"), val = tensor([2])]; + tensor var_17744_cast_fp16 = squeeze(axes = var_17744_axes_0, x = var_17742_cast_fp16)[name = string("op_17744_cast_fp16")]; + tensor logits_perm_0 = const()[name = string("logits_perm_0"), val = tensor([0, 2, 1])]; + int32 chunk_argmax_1_axis_0 = const()[name = string("chunk_argmax_1_axis_0"), val = int32(-1)]; + bool chunk_argmax_1_keep_dims_0 = const()[name = string("chunk_argmax_1_keep_dims_0"), val = bool(true)]; + string chunk_argmax_1_output_dtype_0 = const()[name = string("chunk_argmax_1_output_dtype_0"), val = string("int32")]; + tensor logits_1_cast_fp16 = transpose(perm = logits_1_perm_0, x = var_17504_cast_fp16)[name = string("transpose_15")]; + tensor chunk_argmax_1_cast_fp16 = reduce_argmax(axis = chunk_argmax_1_axis_0, keep_dims = chunk_argmax_1_keep_dims_0, output_dtype = chunk_argmax_1_output_dtype_0, x = logits_1_cast_fp16)[name = string("chunk_argmax_1_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = logits_1_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + int32 chunk_argmax_3_axis_0 = const()[name = string("chunk_argmax_3_axis_0"), val = int32(-1)]; + bool chunk_argmax_3_keep_dims_0 = const()[name = string("chunk_argmax_3_keep_dims_0"), val = bool(true)]; + string chunk_argmax_3_output_dtype_0 = const()[name = string("chunk_argmax_3_output_dtype_0"), val = string("int32")]; + tensor logits_3_cast_fp16 = transpose(perm = logits_3_perm_0, x = var_17520_cast_fp16)[name = string("transpose_14")]; + tensor chunk_argmax_3_cast_fp16 = reduce_argmax(axis = chunk_argmax_3_axis_0, keep_dims = chunk_argmax_3_keep_dims_0, output_dtype = chunk_argmax_3_output_dtype_0, x = logits_3_cast_fp16)[name = string("chunk_argmax_3_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = logits_3_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + int32 chunk_argmax_5_axis_0 = const()[name = string("chunk_argmax_5_axis_0"), val = int32(-1)]; + bool chunk_argmax_5_keep_dims_0 = const()[name = string("chunk_argmax_5_keep_dims_0"), val = bool(true)]; + string chunk_argmax_5_output_dtype_0 = const()[name = string("chunk_argmax_5_output_dtype_0"), val = string("int32")]; + tensor logits_5_cast_fp16 = transpose(perm = logits_5_perm_0, x = var_17536_cast_fp16)[name = string("transpose_13")]; + tensor chunk_argmax_5_cast_fp16 = reduce_argmax(axis = chunk_argmax_5_axis_0, keep_dims = chunk_argmax_5_keep_dims_0, output_dtype = chunk_argmax_5_output_dtype_0, x = logits_5_cast_fp16)[name = string("chunk_argmax_5_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = logits_5_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + int32 chunk_argmax_7_axis_0 = const()[name = string("chunk_argmax_7_axis_0"), val = int32(-1)]; + bool chunk_argmax_7_keep_dims_0 = const()[name = string("chunk_argmax_7_keep_dims_0"), val = bool(true)]; + string chunk_argmax_7_output_dtype_0 = const()[name = string("chunk_argmax_7_output_dtype_0"), val = string("int32")]; + tensor logits_7_cast_fp16 = transpose(perm = logits_7_perm_0, x = var_17552_cast_fp16)[name = string("transpose_12")]; + tensor chunk_argmax_7_cast_fp16 = reduce_argmax(axis = chunk_argmax_7_axis_0, keep_dims = chunk_argmax_7_keep_dims_0, output_dtype = chunk_argmax_7_output_dtype_0, x = logits_7_cast_fp16)[name = string("chunk_argmax_7_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = logits_7_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + int32 chunk_argmax_9_axis_0 = const()[name = string("chunk_argmax_9_axis_0"), val = int32(-1)]; + bool chunk_argmax_9_keep_dims_0 = const()[name = string("chunk_argmax_9_keep_dims_0"), val = bool(true)]; + string chunk_argmax_9_output_dtype_0 = const()[name = string("chunk_argmax_9_output_dtype_0"), val = string("int32")]; + tensor logits_9_cast_fp16 = transpose(perm = logits_9_perm_0, x = var_17568_cast_fp16)[name = string("transpose_11")]; + tensor chunk_argmax_9_cast_fp16 = reduce_argmax(axis = chunk_argmax_9_axis_0, keep_dims = chunk_argmax_9_keep_dims_0, output_dtype = chunk_argmax_9_output_dtype_0, x = logits_9_cast_fp16)[name = string("chunk_argmax_9_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = logits_9_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + int32 chunk_argmax_11_axis_0 = const()[name = string("chunk_argmax_11_axis_0"), val = int32(-1)]; + bool chunk_argmax_11_keep_dims_0 = const()[name = string("chunk_argmax_11_keep_dims_0"), val = bool(true)]; + string chunk_argmax_11_output_dtype_0 = const()[name = string("chunk_argmax_11_output_dtype_0"), val = string("int32")]; + tensor logits_11_cast_fp16 = transpose(perm = logits_11_perm_0, x = var_17584_cast_fp16)[name = string("transpose_10")]; + tensor chunk_argmax_11_cast_fp16 = reduce_argmax(axis = chunk_argmax_11_axis_0, keep_dims = chunk_argmax_11_keep_dims_0, output_dtype = chunk_argmax_11_output_dtype_0, x = logits_11_cast_fp16)[name = string("chunk_argmax_11_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = logits_11_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + int32 chunk_argmax_13_axis_0 = const()[name = string("chunk_argmax_13_axis_0"), val = int32(-1)]; + bool chunk_argmax_13_keep_dims_0 = const()[name = string("chunk_argmax_13_keep_dims_0"), val = bool(true)]; + string chunk_argmax_13_output_dtype_0 = const()[name = string("chunk_argmax_13_output_dtype_0"), val = string("int32")]; + tensor logits_13_cast_fp16 = transpose(perm = logits_13_perm_0, x = var_17600_cast_fp16)[name = string("transpose_9")]; + tensor chunk_argmax_13_cast_fp16 = reduce_argmax(axis = chunk_argmax_13_axis_0, keep_dims = chunk_argmax_13_keep_dims_0, output_dtype = chunk_argmax_13_output_dtype_0, x = logits_13_cast_fp16)[name = string("chunk_argmax_13_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = logits_13_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + int32 chunk_argmax_15_axis_0 = const()[name = string("chunk_argmax_15_axis_0"), val = int32(-1)]; + bool chunk_argmax_15_keep_dims_0 = const()[name = string("chunk_argmax_15_keep_dims_0"), val = bool(true)]; + string chunk_argmax_15_output_dtype_0 = const()[name = string("chunk_argmax_15_output_dtype_0"), val = string("int32")]; + tensor logits_15_cast_fp16 = transpose(perm = logits_15_perm_0, x = var_17616_cast_fp16)[name = string("transpose_8")]; + tensor chunk_argmax_15_cast_fp16 = reduce_argmax(axis = chunk_argmax_15_axis_0, keep_dims = chunk_argmax_15_keep_dims_0, output_dtype = chunk_argmax_15_output_dtype_0, x = logits_15_cast_fp16)[name = string("chunk_argmax_15_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = logits_15_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + int32 chunk_argmax_17_axis_0 = const()[name = string("chunk_argmax_17_axis_0"), val = int32(-1)]; + bool chunk_argmax_17_keep_dims_0 = const()[name = string("chunk_argmax_17_keep_dims_0"), val = bool(true)]; + string chunk_argmax_17_output_dtype_0 = const()[name = string("chunk_argmax_17_output_dtype_0"), val = string("int32")]; + tensor logits_17_cast_fp16 = transpose(perm = logits_17_perm_0, x = var_17632_cast_fp16)[name = string("transpose_7")]; + tensor chunk_argmax_17_cast_fp16 = reduce_argmax(axis = chunk_argmax_17_axis_0, keep_dims = chunk_argmax_17_keep_dims_0, output_dtype = chunk_argmax_17_output_dtype_0, x = logits_17_cast_fp16)[name = string("chunk_argmax_17_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = logits_17_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + int32 chunk_argmax_19_axis_0 = const()[name = string("chunk_argmax_19_axis_0"), val = int32(-1)]; + bool chunk_argmax_19_keep_dims_0 = const()[name = string("chunk_argmax_19_keep_dims_0"), val = bool(true)]; + string chunk_argmax_19_output_dtype_0 = const()[name = string("chunk_argmax_19_output_dtype_0"), val = string("int32")]; + tensor logits_19_cast_fp16 = transpose(perm = logits_19_perm_0, x = var_17648_cast_fp16)[name = string("transpose_6")]; + tensor chunk_argmax_19_cast_fp16 = reduce_argmax(axis = chunk_argmax_19_axis_0, keep_dims = chunk_argmax_19_keep_dims_0, output_dtype = chunk_argmax_19_output_dtype_0, x = logits_19_cast_fp16)[name = string("chunk_argmax_19_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9_cast_fp16 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = logits_19_cast_fp16)[name = string("reduce_max_9_cast_fp16")]; + int32 chunk_argmax_21_axis_0 = const()[name = string("chunk_argmax_21_axis_0"), val = int32(-1)]; + bool chunk_argmax_21_keep_dims_0 = const()[name = string("chunk_argmax_21_keep_dims_0"), val = bool(true)]; + string chunk_argmax_21_output_dtype_0 = const()[name = string("chunk_argmax_21_output_dtype_0"), val = string("int32")]; + tensor logits_21_cast_fp16 = transpose(perm = logits_21_perm_0, x = var_17664_cast_fp16)[name = string("transpose_5")]; + tensor chunk_argmax_21_cast_fp16 = reduce_argmax(axis = chunk_argmax_21_axis_0, keep_dims = chunk_argmax_21_keep_dims_0, output_dtype = chunk_argmax_21_output_dtype_0, x = logits_21_cast_fp16)[name = string("chunk_argmax_21_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10_cast_fp16 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = logits_21_cast_fp16)[name = string("reduce_max_10_cast_fp16")]; + int32 chunk_argmax_23_axis_0 = const()[name = string("chunk_argmax_23_axis_0"), val = int32(-1)]; + bool chunk_argmax_23_keep_dims_0 = const()[name = string("chunk_argmax_23_keep_dims_0"), val = bool(true)]; + string chunk_argmax_23_output_dtype_0 = const()[name = string("chunk_argmax_23_output_dtype_0"), val = string("int32")]; + tensor logits_23_cast_fp16 = transpose(perm = logits_23_perm_0, x = var_17680_cast_fp16)[name = string("transpose_4")]; + tensor chunk_argmax_23_cast_fp16 = reduce_argmax(axis = chunk_argmax_23_axis_0, keep_dims = chunk_argmax_23_keep_dims_0, output_dtype = chunk_argmax_23_output_dtype_0, x = logits_23_cast_fp16)[name = string("chunk_argmax_23_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11_cast_fp16 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = logits_23_cast_fp16)[name = string("reduce_max_11_cast_fp16")]; + int32 chunk_argmax_25_axis_0 = const()[name = string("chunk_argmax_25_axis_0"), val = int32(-1)]; + bool chunk_argmax_25_keep_dims_0 = const()[name = string("chunk_argmax_25_keep_dims_0"), val = bool(true)]; + string chunk_argmax_25_output_dtype_0 = const()[name = string("chunk_argmax_25_output_dtype_0"), val = string("int32")]; + tensor logits_25_cast_fp16 = transpose(perm = logits_25_perm_0, x = var_17696_cast_fp16)[name = string("transpose_3")]; + tensor chunk_argmax_25_cast_fp16 = reduce_argmax(axis = chunk_argmax_25_axis_0, keep_dims = chunk_argmax_25_keep_dims_0, output_dtype = chunk_argmax_25_output_dtype_0, x = logits_25_cast_fp16)[name = string("chunk_argmax_25_cast_fp16")]; + tensor reduce_max_12_axes_0 = const()[name = string("reduce_max_12_axes_0"), val = tensor([-1])]; + bool reduce_max_12_keep_dims_0 = const()[name = string("reduce_max_12_keep_dims_0"), val = bool(true)]; + tensor reduce_max_12_cast_fp16 = reduce_max(axes = reduce_max_12_axes_0, keep_dims = reduce_max_12_keep_dims_0, x = logits_25_cast_fp16)[name = string("reduce_max_12_cast_fp16")]; + int32 chunk_argmax_27_axis_0 = const()[name = string("chunk_argmax_27_axis_0"), val = int32(-1)]; + bool chunk_argmax_27_keep_dims_0 = const()[name = string("chunk_argmax_27_keep_dims_0"), val = bool(true)]; + string chunk_argmax_27_output_dtype_0 = const()[name = string("chunk_argmax_27_output_dtype_0"), val = string("int32")]; + tensor logits_27_cast_fp16 = transpose(perm = logits_27_perm_0, x = var_17712_cast_fp16)[name = string("transpose_2")]; + tensor chunk_argmax_27_cast_fp16 = reduce_argmax(axis = chunk_argmax_27_axis_0, keep_dims = chunk_argmax_27_keep_dims_0, output_dtype = chunk_argmax_27_output_dtype_0, x = logits_27_cast_fp16)[name = string("chunk_argmax_27_cast_fp16")]; + tensor reduce_max_13_axes_0 = const()[name = string("reduce_max_13_axes_0"), val = tensor([-1])]; + bool reduce_max_13_keep_dims_0 = const()[name = string("reduce_max_13_keep_dims_0"), val = bool(true)]; + tensor reduce_max_13_cast_fp16 = reduce_max(axes = reduce_max_13_axes_0, keep_dims = reduce_max_13_keep_dims_0, x = logits_27_cast_fp16)[name = string("reduce_max_13_cast_fp16")]; + int32 chunk_argmax_29_axis_0 = const()[name = string("chunk_argmax_29_axis_0"), val = int32(-1)]; + bool chunk_argmax_29_keep_dims_0 = const()[name = string("chunk_argmax_29_keep_dims_0"), val = bool(true)]; + string chunk_argmax_29_output_dtype_0 = const()[name = string("chunk_argmax_29_output_dtype_0"), val = string("int32")]; + tensor logits_29_cast_fp16 = transpose(perm = logits_29_perm_0, x = var_17728_cast_fp16)[name = string("transpose_1")]; + tensor chunk_argmax_29_cast_fp16 = reduce_argmax(axis = chunk_argmax_29_axis_0, keep_dims = chunk_argmax_29_keep_dims_0, output_dtype = chunk_argmax_29_output_dtype_0, x = logits_29_cast_fp16)[name = string("chunk_argmax_29_cast_fp16")]; + tensor reduce_max_14_axes_0 = const()[name = string("reduce_max_14_axes_0"), val = tensor([-1])]; + bool reduce_max_14_keep_dims_0 = const()[name = string("reduce_max_14_keep_dims_0"), val = bool(true)]; + tensor reduce_max_14_cast_fp16 = reduce_max(axes = reduce_max_14_axes_0, keep_dims = reduce_max_14_keep_dims_0, x = logits_29_cast_fp16)[name = string("reduce_max_14_cast_fp16")]; + int32 chunk_argmax_axis_0 = const()[name = string("chunk_argmax_axis_0"), val = int32(-1)]; + bool chunk_argmax_keep_dims_0 = const()[name = string("chunk_argmax_keep_dims_0"), val = bool(true)]; + string chunk_argmax_output_dtype_0 = const()[name = string("chunk_argmax_output_dtype_0"), val = string("int32")]; + tensor logits_cast_fp16 = transpose(perm = logits_perm_0, x = var_17744_cast_fp16)[name = string("transpose_0")]; + tensor chunk_argmax_cast_fp16 = reduce_argmax(axis = chunk_argmax_axis_0, keep_dims = chunk_argmax_keep_dims_0, output_dtype = chunk_argmax_output_dtype_0, x = logits_cast_fp16)[name = string("chunk_argmax_cast_fp16")]; + tensor reduce_max_15_axes_0 = const()[name = string("reduce_max_15_axes_0"), val = tensor([-1])]; + bool reduce_max_15_keep_dims_0 = const()[name = string("reduce_max_15_keep_dims_0"), val = bool(true)]; + tensor reduce_max_15_cast_fp16 = reduce_max(axes = reduce_max_15_axes_0, keep_dims = reduce_max_15_keep_dims_0, x = logits_cast_fp16)[name = string("reduce_max_15_cast_fp16")]; + int32 var_17941 = const()[name = string("op_17941"), val = int32(-1)]; + bool var_17942_interleave_0 = const()[name = string("op_17942_interleave_0"), val = bool(false)]; + tensor var_17942 = concat(axis = var_17941, interleave = var_17942_interleave_0, values = (chunk_argmax_1_cast_fp16, chunk_argmax_3_cast_fp16, chunk_argmax_5_cast_fp16, chunk_argmax_7_cast_fp16, chunk_argmax_9_cast_fp16, chunk_argmax_11_cast_fp16, chunk_argmax_13_cast_fp16, chunk_argmax_15_cast_fp16, chunk_argmax_17_cast_fp16, chunk_argmax_19_cast_fp16, chunk_argmax_21_cast_fp16, chunk_argmax_23_cast_fp16, chunk_argmax_25_cast_fp16, chunk_argmax_27_cast_fp16, chunk_argmax_29_cast_fp16, chunk_argmax_cast_fp16))[name = string("op_17942")]; + tensor var_17944_axes_0 = const()[name = string("op_17944_axes_0"), val = tensor([0])]; + string var_17942_to_uint16_dtype_0 = const()[name = string("op_17942_to_uint16_dtype_0"), val = string("uint16")]; + tensor var_17942_to_uint16 = cast(dtype = var_17942_to_uint16_dtype_0, x = var_17942)[name = string("cast_1")]; + tensor var_17944_cast_uint16 = squeeze(axes = var_17944_axes_0, x = var_17942_to_uint16)[name = string("op_17944_cast_uint16")]; + tensor var_17946_axes_0 = const()[name = string("op_17946_axes_0"), val = tensor([0])]; + tensor var_17946_cast_uint16 = squeeze(axes = var_17946_axes_0, x = var_17944_cast_uint16)[name = string("op_17946_cast_uint16")]; + string var_17946_cast_uint16_to_int32_dtype_0 = const()[name = string("op_17946_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 var_17948 = const()[name = string("op_17948"), val = int32(-1)]; + bool var_17949_interleave_0 = const()[name = string("op_17949_interleave_0"), val = bool(false)]; + tensor var_17949_cast_fp16 = concat(axis = var_17948, interleave = var_17949_interleave_0, values = (reduce_max_0_cast_fp16, reduce_max_1_cast_fp16, reduce_max_2_cast_fp16, reduce_max_3_cast_fp16, reduce_max_4_cast_fp16, reduce_max_5_cast_fp16, reduce_max_6_cast_fp16, reduce_max_7_cast_fp16, reduce_max_8_cast_fp16, reduce_max_9_cast_fp16, reduce_max_10_cast_fp16, reduce_max_11_cast_fp16, reduce_max_12_cast_fp16, reduce_max_13_cast_fp16, reduce_max_14_cast_fp16, reduce_max_15_cast_fp16))[name = string("op_17949_cast_fp16")]; + tensor var_17951_axes_0 = const()[name = string("op_17951_axes_0"), val = tensor([0])]; + tensor var_17951_cast_fp16 = squeeze(axes = var_17951_axes_0, x = var_17949_cast_fp16)[name = string("op_17951_cast_fp16")]; + tensor var_17953_axes_0 = const()[name = string("op_17953_axes_0"), val = tensor([0])]; + tensor argmax_val = squeeze(axes = var_17953_axes_0, x = var_17951_cast_fp16)[name = string("op_17953_cast_fp16")]; + tensor argmax_idx = cast(dtype = var_17946_cast_uint16_to_int32_dtype_0, x = var_17946_cast_uint16)[name = string("cast_0")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (argmax_idx, argmax_val); + func infer_rotate(tensor causal_mask, tensor current_pos, tensor input_ids, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { + tensor model_model_embed_tokens_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301990016))))[name = string("model_model_embed_tokens_weight_palettized")]; + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335544512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336429312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336462144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336683392))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336691648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336912896))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336921152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337805952))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337838784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338060032))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338068288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338289536))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338297792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339182592))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339215424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339436672))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339444928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339666176))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339674432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340559232))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340592064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340813312))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340821568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341042816))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341051072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341935872))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342189952))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342198208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342419456))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342427712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343312512))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343345344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343566592))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343574848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343796096))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343804352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344689152))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344721984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344943232))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344951488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345172736))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345180992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346065792))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346098624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346319872))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346328128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346549376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346557632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347442432))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347475264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347696512))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347926016))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347934272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348819072))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348851904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349073152))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349081408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349302656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350195712))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350228544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350449792))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350458048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350679296))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350687552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351572352))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351826432))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351834688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352055936))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352948992))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352981824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353203072))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353211328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353432576))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353440832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354325632))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354358464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354579712))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354587968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354809216))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354817472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355702272))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355735104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355956352))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355964608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356185856))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356194112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357078912))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357111744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357332992))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357341248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357562496))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357570752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358455552))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358488384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358709632))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358717888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358939136))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358947392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359832192))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359865024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360086272))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360094528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360315776))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360324032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361208832))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361241664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361462912))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361471168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361692416))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361700672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362585472))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362618304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362839552))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362847808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363069056))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363077312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363962112))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363994944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364216192))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364445696))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364453952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365338752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365371584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365592832))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365601088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365822336))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365830592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366715392))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366748224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366969472))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367198976))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367207232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368092032))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368124864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368346112))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368354368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368575616))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368583872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369468672))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369501504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369722752))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369731008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369952256))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369960512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370845312))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370878144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371099392))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371107648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371328896))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + int32 hidden_states_1_batch_dims_0 = const()[name = string("hidden_states_1_batch_dims_0"), val = int32(0)]; + bool hidden_states_1_validate_indices_0 = const()[name = string("hidden_states_1_validate_indices_0"), val = bool(false)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; + tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(262144)]; + tensor add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")]; + tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 hidden_states_1_axis_0 = const()[name = string("hidden_states_1_axis_0"), val = int32(0)]; + tensor hidden_states_1 = gather(axis = hidden_states_1_axis_0, batch_dims = hidden_states_1_batch_dims_0, indices = select_0_1, validate_indices = hidden_states_1_validate_indices_0, x = model_model_embed_tokens_weight_palettized)[name = string("hidden_states_1")]; + fp16 var_1647_to_fp16 = const()[name = string("op_1647_to_fp16"), val = fp16(0x1.0f8p+5)]; + tensor hidden_states_3_cast_fp16 = mul(x = hidden_states_1, y = var_1647_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + int32 var_1662_axis_0 = const()[name = string("op_1662_axis_0"), val = int32(1)]; + int32 var_1662_batch_dims_0 = const()[name = string("op_1662_batch_dims_0"), val = int32(0)]; + bool var_1662_validate_indices_0 = const()[name = string("op_1662_validate_indices_0"), val = bool(false)]; + tensor var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371337152)))]; + string current_pos_to_uint16_dtype_0 = const()[name = string("current_pos_to_uint16_dtype_0"), val = string("uint16")]; + tensor current_pos_to_uint16 = cast(dtype = current_pos_to_uint16_dtype_0, x = current_pos)[name = string("cast_2")]; + tensor var_1662_cast_fp16_cast_uint16 = gather(axis = var_1662_axis_0, batch_dims = var_1662_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1662_validate_indices_0, x = var_1654_to_fp16)[name = string("op_1662_cast_fp16_cast_uint16")]; + tensor var_1667 = const()[name = string("op_1667"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1667, x = var_1662_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1677_axis_0 = const()[name = string("op_1677_axis_0"), val = int32(1)]; + int32 var_1677_batch_dims_0 = const()[name = string("op_1677_batch_dims_0"), val = int32(0)]; + bool var_1677_validate_indices_0 = const()[name = string("op_1677_validate_indices_0"), val = bool(false)]; + tensor var_1669_to_fp16 = const()[name = string("op_1669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375531520)))]; + tensor var_1677_cast_fp16_cast_uint16 = gather(axis = var_1677_axis_0, batch_dims = var_1677_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1677_validate_indices_0, x = var_1669_to_fp16)[name = string("op_1677_cast_fp16_cast_uint16")]; + tensor var_1682 = const()[name = string("op_1682"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1682, x = var_1677_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1703 = const()[name = string("op_1703"), val = int32(-1)]; + fp16 const_0_promoted = const()[name = string("const_0_promoted"), val = fp16(-0x1p+0)]; + tensor var_1705 = mul(x = hidden_states_3_cast_fp16, y = const_0_promoted)[name = string("op_1705")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1 = concat(axis = var_1703, interleave = input_1_interleave_0, values = (hidden_states_3_cast_fp16, var_1705))[name = string("input_1")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1700_to_fp16 = const()[name = string("op_1700_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1700_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3")]; + tensor var_1719_to_fp16 = const()[name = string("op_1719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379725888)))]; + tensor hidden_states_7_cast_fp16 = mul(x = normed_3, y = var_1719_to_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_1724 = const()[name = string("op_1724"), val = tensor([0, 2, 1])]; + tensor var_1727_axes_0 = const()[name = string("op_1727_axes_0"), val = tensor([2])]; + tensor var_1725_cast_fp16 = transpose(perm = var_1724, x = hidden_states_7_cast_fp16)[name = string("transpose_172")]; + tensor var_1727_cast_fp16 = expand_dims(axes = var_1727_axes_0, x = var_1725_cast_fp16)[name = string("op_1727_cast_fp16")]; + string var_1743_pad_type_0 = const()[name = string("op_1743_pad_type_0"), val = string("valid")]; + tensor var_1743_strides_0 = const()[name = string("op_1743_strides_0"), val = tensor([1, 1])]; + tensor var_1743_pad_0 = const()[name = string("op_1743_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1743_dilations_0 = const()[name = string("op_1743_dilations_0"), val = tensor([1, 1])]; + int32 var_1743_groups_0 = const()[name = string("op_1743_groups_0"), val = int32(1)]; + tensor var_1743 = conv(dilations = var_1743_dilations_0, groups = var_1743_groups_0, pad = var_1743_pad_0, pad_type = var_1743_pad_type_0, strides = var_1743_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1727_cast_fp16)[name = string("op_1743")]; + tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 4, 1, 256])]; + tensor var_1749 = reshape(shape = var_1748, x = var_1743)[name = string("op_1749")]; + string var_1765_pad_type_0 = const()[name = string("op_1765_pad_type_0"), val = string("valid")]; + tensor var_1765_strides_0 = const()[name = string("op_1765_strides_0"), val = tensor([1, 1])]; + tensor var_1765_pad_0 = const()[name = string("op_1765_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1765_dilations_0 = const()[name = string("op_1765_dilations_0"), val = tensor([1, 1])]; + int32 var_1765_groups_0 = const()[name = string("op_1765_groups_0"), val = int32(1)]; + tensor var_1765 = conv(dilations = var_1765_dilations_0, groups = var_1765_groups_0, pad = var_1765_pad_0, pad_type = var_1765_pad_type_0, strides = var_1765_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1727_cast_fp16)[name = string("op_1765")]; + tensor var_1770 = const()[name = string("op_1770"), val = tensor([1, 1, 1, 256])]; + tensor var_1771 = reshape(shape = var_1770, x = var_1765)[name = string("op_1771")]; + string var_1787_pad_type_0 = const()[name = string("op_1787_pad_type_0"), val = string("valid")]; + tensor var_1787_strides_0 = const()[name = string("op_1787_strides_0"), val = tensor([1, 1])]; + tensor var_1787_pad_0 = const()[name = string("op_1787_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1787_dilations_0 = const()[name = string("op_1787_dilations_0"), val = tensor([1, 1])]; + int32 var_1787_groups_0 = const()[name = string("op_1787_groups_0"), val = int32(1)]; + tensor var_1787 = conv(dilations = var_1787_dilations_0, groups = var_1787_groups_0, pad = var_1787_pad_0, pad_type = var_1787_pad_type_0, strides = var_1787_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1727_cast_fp16)[name = string("op_1787")]; + tensor var_1792 = const()[name = string("op_1792"), val = tensor([1, 1, 1, 256])]; + tensor var_1793 = reshape(shape = var_1792, x = var_1787)[name = string("op_1793")]; + int32 var_1808 = const()[name = string("op_1808"), val = int32(-1)]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_1810 = mul(x = var_1749, y = const_4_promoted)[name = string("op_1810")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1808, interleave = input_5_interleave_0, values = (var_1749, var_1810))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1805_to_fp16 = const()[name = string("op_1805_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1805_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor var_1824_to_fp16 = const()[name = string("op_1824_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728256)))]; + tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1824_to_fp16)[name = string("q_1_cast_fp16")]; + int32 var_1835 = const()[name = string("op_1835"), val = int32(-1)]; + fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; + tensor var_1837 = mul(x = var_1771, y = const_8_promoted)[name = string("op_1837")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1835, interleave = input_7_interleave_0, values = (var_1771, var_1837))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1832_to_fp16 = const()[name = string("op_1832_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1832_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728832)))]; + tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1851_to_fp16)[name = string("k_1_cast_fp16")]; + tensor var_1853_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1853_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1874_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1874_cast_fp16")]; + int32 var_1876 = const()[name = string("op_1876"), val = int32(-1)]; + bool var_1877_interleave_0 = const()[name = string("op_1877_interleave_0"), val = bool(false)]; + tensor var_1877_cast_fp16 = concat(axis = var_1876, interleave = var_1877_interleave_0, values = (var_1874_cast_fp16, x1_1_cast_fp16))[name = string("op_1877_cast_fp16")]; + tensor var_1878_cast_fp16 = mul(x = var_1877_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1878_cast_fp16")]; + tensor query_states_1_cast_fp16 = add(x = var_1853_cast_fp16, y = var_1878_cast_fp16)[name = string("query_states_1_cast_fp16")]; + tensor var_1881_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1881_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1902_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1902_cast_fp16")]; + int32 var_1904 = const()[name = string("op_1904"), val = int32(-1)]; + bool var_1905_interleave_0 = const()[name = string("op_1905_interleave_0"), val = bool(false)]; + tensor var_1905_cast_fp16 = concat(axis = var_1904, interleave = var_1905_interleave_0, values = (var_1902_cast_fp16, x1_3_cast_fp16))[name = string("op_1905_cast_fp16")]; + tensor var_1906_cast_fp16 = mul(x = var_1905_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1906_cast_fp16")]; + tensor key_states_1_cast_fp16 = add(x = var_1881_cast_fp16, y = var_1906_cast_fp16)[name = string("key_states_1_cast_fp16")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; + tensor key_slice_1_begin_0 = const()[name = string("key_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor key_slice_1_end_0 = const()[name = string("key_slice_1_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_slice_1_end_mask_0 = const()[name = string("key_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_1_cast_fp16 = slice_by_index(begin = key_slice_1_begin_0, end = key_slice_1_end_0, end_mask = key_slice_1_end_mask_0, x = read_state_0)[name = string("key_slice_1_cast_fp16")]; + tensor key_tail_1_begin_0 = const()[name = string("key_tail_1_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_1_end_0 = const()[name = string("key_tail_1_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_1_cast_fp16 = slice_by_index(begin = key_tail_1_begin_0, end = key_tail_1_end_0, x = key_slice_1_cast_fp16)[name = string("key_tail_1_cast_fp16")]; + int32 var_1919 = const()[name = string("op_1919"), val = int32(2)]; + bool shifted_key_1_interleave_0 = const()[name = string("shifted_key_1_interleave_0"), val = bool(false)]; + tensor shifted_key_1_cast_fp16 = concat(axis = var_1919, interleave = shifted_key_1_interleave_0, values = (key_tail_1_cast_fp16, key_states_1_cast_fp16))[name = string("shifted_key_1_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = shifted_key_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_52")]; + tensor value_slice_1_begin_0 = const()[name = string("value_slice_1_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor value_slice_1_end_0 = const()[name = string("value_slice_1_end_0"), val = tensor([23, 1, 512, 256])]; + tensor value_slice_1_end_mask_0 = const()[name = string("value_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_1_cast_fp16 = slice_by_index(begin = value_slice_1_begin_0, end = value_slice_1_end_0, end_mask = value_slice_1_end_mask_0, x = coreml_update_state_52)[name = string("value_slice_1_cast_fp16")]; + tensor value_tail_1_begin_0 = const()[name = string("value_tail_1_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_1_end_0 = const()[name = string("value_tail_1_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_1_cast_fp16 = slice_by_index(begin = value_tail_1_begin_0, end = value_tail_1_end_0, x = value_slice_1_cast_fp16)[name = string("value_tail_1_cast_fp16")]; + int32 var_1953 = const()[name = string("op_1953"), val = int32(2)]; + bool shifted_value_1_interleave_0 = const()[name = string("shifted_value_1_interleave_0"), val = bool(false)]; + tensor shifted_value_1_cast_fp16 = concat(axis = var_1953, interleave = shifted_value_1_interleave_0, values = (value_tail_1_cast_fp16, var_1793))[name = string("shifted_value_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([22, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([23, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = shifted_value_1_cast_fp16, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_53")]; + tensor var_1981_begin_0 = const()[name = string("op_1981_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1981_end_0 = const()[name = string("op_1981_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_1981_end_mask_0 = const()[name = string("op_1981_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1981_cast_fp16 = slice_by_index(begin = var_1981_begin_0, end = var_1981_end_0, end_mask = var_1981_end_mask_0, x = coreml_update_state_53)[name = string("op_1981_cast_fp16")]; + tensor var_1988_begin_0 = const()[name = string("op_1988_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_1988_end_0 = const()[name = string("op_1988_end_0"), val = tensor([23, 1, 512, 256])]; + tensor var_1988_end_mask_0 = const()[name = string("op_1988_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1988_cast_fp16 = slice_by_index(begin = var_1988_begin_0, end = var_1988_end_0, end_mask = var_1988_end_mask_0, x = coreml_update_state_53)[name = string("op_1988_cast_fp16")]; + tensor var_2025 = const()[name = string("op_2025"), val = tensor([1, 4, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_2025, x = var_1981_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_2045 = const()[name = string("op_2045"), val = tensor([1, 4, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_2045, x = var_1988_cast_fp16)[name = string("x_11_cast_fp16")]; + bool var_2072_transpose_x_1 = const()[name = string("op_2072_transpose_x_1"), val = bool(false)]; + bool var_2072_transpose_y_1 = const()[name = string("op_2072_transpose_y_1"), val = bool(true)]; + tensor var_2072 = matmul(transpose_x = var_2072_transpose_x_1, transpose_y = var_2072_transpose_y_1, x = query_states_1_cast_fp16, y = x_5_cast_fp16)[name = string("op_2072")]; + fp16 var_2073_to_fp16 = const()[name = string("op_2073_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_2072, y = var_2073_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor var_2105_begin_0 = const()[name = string("op_2105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2105_end_0 = const()[name = string("op_2105_end_0"), val = tensor([1, 1, 1, 512])]; + tensor var_2105_end_mask_0 = const()[name = string("op_2105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2105 = slice_by_index(begin = var_2105_begin_0, end = var_2105_end_0, end_mask = var_2105_end_mask_0, x = causal_mask)[name = string("op_2105")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = var_2105)[name = string("attn_weights_3_cast_fp16")]; + int32 var_2108 = const()[name = string("op_2108"), val = int32(-1)]; + tensor attn_weights_5_cast_fp16 = softmax(axis = var_2108, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = x_11_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_2119_perm_0 = const()[name = string("op_2119_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2123 = const()[name = string("op_2123"), val = tensor([1, 1, 1024])]; + tensor var_2119_cast_fp16 = transpose(perm = var_2119_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_171")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_2123, x = var_2119_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_2128 = const()[name = string("op_2128"), val = tensor([0, 2, 1])]; + string var_2144_pad_type_0 = const()[name = string("op_2144_pad_type_0"), val = string("valid")]; + int32 var_2144_groups_0 = const()[name = string("op_2144_groups_0"), val = int32(1)]; + tensor var_2144_strides_0 = const()[name = string("op_2144_strides_0"), val = tensor([1])]; + tensor var_2144_pad_0 = const()[name = string("op_2144_pad_0"), val = tensor([0, 0])]; + tensor var_2144_dilations_0 = const()[name = string("op_2144_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379729408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380614208))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2129_cast_fp16 = transpose(perm = var_2128, x = attn_output_5_cast_fp16)[name = string("transpose_170")]; + tensor var_2144_cast_fp16 = conv(dilations = var_2144_dilations_0, groups = var_2144_groups_0, pad = var_2144_pad_0, pad_type = var_2144_pad_type_0, strides = var_2144_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2129_cast_fp16)[name = string("op_2144_cast_fp16")]; + tensor var_2148 = const()[name = string("op_2148"), val = tensor([0, 2, 1])]; + int32 var_2159 = const()[name = string("op_2159"), val = int32(-1)]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_13_cast_fp16 = transpose(perm = var_2148, x = var_2144_cast_fp16)[name = string("transpose_169")]; + tensor var_2161_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2161_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_2159, interleave = input_11_interleave_0, values = (hidden_states_13_cast_fp16, var_2161_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_2156_to_fp16 = const()[name = string("op_2156_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2156_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380651136)))]; + tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2175_to_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = attn_output_9_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + int32 var_2188 = const()[name = string("op_2188"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2190_cast_fp16 = mul(x = hidden_states_15_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2190_cast_fp16")]; + bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; + tensor input_13_cast_fp16 = concat(axis = var_2188, interleave = input_13_interleave_0, values = (hidden_states_15_cast_fp16, var_2190_cast_fp16))[name = string("input_13_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2185_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor var_2204_to_fp16 = const()[name = string("op_2204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380653504)))]; + tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2204_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2216 = const()[name = string("op_2216"), val = tensor([0, 2, 1])]; + tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; + tensor var_2217_cast_fp16 = transpose(perm = var_2216, x = x_13_cast_fp16)[name = string("transpose_168")]; + tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2217_cast_fp16)[name = string("input_15_cast_fp16")]; + string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; + tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; + tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; + int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380655872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386627904))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386849152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392821184))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; + string var_2242_mode_0 = const()[name = string("op_2242_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2242_cast_fp16 = gelu(mode = var_2242_mode_0, x = x_15_cast_fp16)[name = string("op_2242_cast_fp16")]; + tensor input_17_cast_fp16 = mul(x = var_2242_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393042432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399014464))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; + tensor var_2250_axes_0 = const()[name = string("op_2250_axes_0"), val = tensor([2])]; + tensor var_2250_cast_fp16 = squeeze(axes = var_2250_axes_0, x = e_1_cast_fp16)[name = string("op_2250_cast_fp16")]; + tensor var_2251 = const()[name = string("op_2251"), val = tensor([0, 2, 1])]; + int32 var_2262 = const()[name = string("op_2262"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_17_cast_fp16 = transpose(perm = var_2251, x = var_2250_cast_fp16)[name = string("transpose_167")]; + tensor var_2264_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2264_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2262, interleave = input_19_interleave_0, values = (hidden_states_17_cast_fp16, var_2264_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2259_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; + tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399051392)))]; + tensor hidden_states_19_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2278_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2329 = const()[name = string("op_2329"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2331_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2331_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_2329, interleave = input_21_interleave_0, values = (hidden_states_21_cast_fp16, var_2331_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2326_to_fp16 = const()[name = string("op_2326_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2326_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; + tensor var_2345_to_fp16 = const()[name = string("op_2345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399053760)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2345_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2350 = const()[name = string("op_2350"), val = tensor([0, 2, 1])]; + tensor var_2353_axes_0 = const()[name = string("op_2353_axes_0"), val = tensor([2])]; + tensor var_2351_cast_fp16 = transpose(perm = var_2350, x = hidden_states_23_cast_fp16)[name = string("transpose_166")]; + tensor var_2353_cast_fp16 = expand_dims(axes = var_2353_axes_0, x = var_2351_cast_fp16)[name = string("op_2353_cast_fp16")]; + string var_2369_pad_type_0 = const()[name = string("op_2369_pad_type_0"), val = string("valid")]; + tensor var_2369_strides_0 = const()[name = string("op_2369_strides_0"), val = tensor([1, 1])]; + tensor var_2369_pad_0 = const()[name = string("op_2369_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2369_dilations_0 = const()[name = string("op_2369_dilations_0"), val = tensor([1, 1])]; + int32 var_2369_groups_0 = const()[name = string("op_2369_groups_0"), val = int32(1)]; + tensor var_2369 = conv(dilations = var_2369_dilations_0, groups = var_2369_groups_0, pad = var_2369_pad_0, pad_type = var_2369_pad_type_0, strides = var_2369_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2353_cast_fp16)[name = string("op_2369")]; + tensor var_2374 = const()[name = string("op_2374"), val = tensor([1, 4, 1, 256])]; + tensor var_2375 = reshape(shape = var_2374, x = var_2369)[name = string("op_2375")]; + string var_2391_pad_type_0 = const()[name = string("op_2391_pad_type_0"), val = string("valid")]; + tensor var_2391_strides_0 = const()[name = string("op_2391_strides_0"), val = tensor([1, 1])]; + tensor var_2391_pad_0 = const()[name = string("op_2391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2391_dilations_0 = const()[name = string("op_2391_dilations_0"), val = tensor([1, 1])]; + int32 var_2391_groups_0 = const()[name = string("op_2391_groups_0"), val = int32(1)]; + tensor var_2391 = conv(dilations = var_2391_dilations_0, groups = var_2391_groups_0, pad = var_2391_pad_0, pad_type = var_2391_pad_type_0, strides = var_2391_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2353_cast_fp16)[name = string("op_2391")]; + tensor var_2396 = const()[name = string("op_2396"), val = tensor([1, 1, 1, 256])]; + tensor var_2397 = reshape(shape = var_2396, x = var_2391)[name = string("op_2397")]; + string var_2413_pad_type_0 = const()[name = string("op_2413_pad_type_0"), val = string("valid")]; + tensor var_2413_strides_0 = const()[name = string("op_2413_strides_0"), val = tensor([1, 1])]; + tensor var_2413_pad_0 = const()[name = string("op_2413_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2413_dilations_0 = const()[name = string("op_2413_dilations_0"), val = tensor([1, 1])]; + int32 var_2413_groups_0 = const()[name = string("op_2413_groups_0"), val = int32(1)]; + tensor var_2413 = conv(dilations = var_2413_dilations_0, groups = var_2413_groups_0, pad = var_2413_pad_0, pad_type = var_2413_pad_type_0, strides = var_2413_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2353_cast_fp16)[name = string("op_2413")]; + tensor var_2418 = const()[name = string("op_2418"), val = tensor([1, 1, 1, 256])]; + tensor var_2419 = reshape(shape = var_2418, x = var_2413)[name = string("op_2419")]; + int32 var_2434 = const()[name = string("op_2434"), val = int32(-1)]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2436 = mul(x = var_2375, y = const_42_promoted)[name = string("op_2436")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2434, interleave = input_25_interleave_0, values = (var_2375, var_2436))[name = string("input_25")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2431_to_fp16 = const()[name = string("op_2431_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2431_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; + tensor var_2450_to_fp16 = const()[name = string("op_2450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056128)))]; + tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2450_to_fp16)[name = string("q_3_cast_fp16")]; + int32 var_2461 = const()[name = string("op_2461"), val = int32(-1)]; + fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; + tensor var_2463 = mul(x = var_2397, y = const_46_promoted)[name = string("op_2463")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_2461, interleave = input_27_interleave_0, values = (var_2397, var_2463))[name = string("input_27")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2458_to_fp16 = const()[name = string("op_2458_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2458_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; + tensor var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056704)))]; + tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2477_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_2479_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2479_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2500_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2500_cast_fp16")]; + int32 var_2502 = const()[name = string("op_2502"), val = int32(-1)]; + bool var_2503_interleave_0 = const()[name = string("op_2503_interleave_0"), val = bool(false)]; + tensor var_2503_cast_fp16 = concat(axis = var_2502, interleave = var_2503_interleave_0, values = (var_2500_cast_fp16, x1_5_cast_fp16))[name = string("op_2503_cast_fp16")]; + tensor var_2504_cast_fp16 = mul(x = var_2503_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2504_cast_fp16")]; + tensor query_states_5_cast_fp16 = add(x = var_2479_cast_fp16, y = var_2504_cast_fp16)[name = string("query_states_5_cast_fp16")]; + tensor var_2507_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2507_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2528_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_2528_cast_fp16")]; + int32 var_2530 = const()[name = string("op_2530"), val = int32(-1)]; + bool var_2531_interleave_0 = const()[name = string("op_2531_interleave_0"), val = bool(false)]; + tensor var_2531_cast_fp16 = concat(axis = var_2530, interleave = var_2531_interleave_0, values = (var_2528_cast_fp16, x1_7_cast_fp16))[name = string("op_2531_cast_fp16")]; + tensor var_2532_cast_fp16 = mul(x = var_2531_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2532_cast_fp16")]; + tensor key_states_5_cast_fp16 = add(x = var_2507_cast_fp16, y = var_2532_cast_fp16)[name = string("key_states_5_cast_fp16")]; + tensor key_slice_3_begin_0 = const()[name = string("key_slice_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor key_slice_3_end_0 = const()[name = string("key_slice_3_end_0"), val = tensor([2, 1, 512, 256])]; + tensor key_slice_3_end_mask_0 = const()[name = string("key_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_3_cast_fp16 = slice_by_index(begin = key_slice_3_begin_0, end = key_slice_3_end_0, end_mask = key_slice_3_end_mask_0, x = coreml_update_state_53)[name = string("key_slice_3_cast_fp16")]; + tensor key_tail_3_begin_0 = const()[name = string("key_tail_3_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_3_end_0 = const()[name = string("key_tail_3_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_3_cast_fp16 = slice_by_index(begin = key_tail_3_begin_0, end = key_tail_3_end_0, x = key_slice_3_cast_fp16)[name = string("key_tail_3_cast_fp16")]; + int32 var_2545 = const()[name = string("op_2545"), val = int32(2)]; + bool shifted_key_3_interleave_0 = const()[name = string("shifted_key_3_interleave_0"), val = bool(false)]; + tensor shifted_key_3_cast_fp16 = concat(axis = var_2545, interleave = shifted_key_3_interleave_0, values = (key_tail_3_cast_fp16, key_states_5_cast_fp16))[name = string("shifted_key_3_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 0, 0, 0])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([2, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_4, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_5, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = shifted_key_3_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_54")]; + tensor value_slice_3_begin_0 = const()[name = string("value_slice_3_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor value_slice_3_end_0 = const()[name = string("value_slice_3_end_0"), val = tensor([24, 1, 512, 256])]; + tensor value_slice_3_end_mask_0 = const()[name = string("value_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_3_cast_fp16 = slice_by_index(begin = value_slice_3_begin_0, end = value_slice_3_end_0, end_mask = value_slice_3_end_mask_0, x = coreml_update_state_54)[name = string("value_slice_3_cast_fp16")]; + tensor value_tail_3_begin_0 = const()[name = string("value_tail_3_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_3_end_0 = const()[name = string("value_tail_3_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_3_cast_fp16 = slice_by_index(begin = value_tail_3_begin_0, end = value_tail_3_end_0, x = value_slice_3_cast_fp16)[name = string("value_tail_3_cast_fp16")]; + int32 var_2579 = const()[name = string("op_2579"), val = int32(2)]; + bool shifted_value_3_interleave_0 = const()[name = string("shifted_value_3_interleave_0"), val = bool(false)]; + tensor shifted_value_3_cast_fp16 = concat(axis = var_2579, interleave = shifted_value_3_interleave_0, values = (value_tail_3_cast_fp16, var_2419))[name = string("shifted_value_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([23, 0, 0, 0])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([24, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = shifted_value_3_cast_fp16, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_55")]; + tensor var_2607_begin_0 = const()[name = string("op_2607_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2607_end_0 = const()[name = string("op_2607_end_0"), val = tensor([2, 1, 512, 256])]; + tensor var_2607_end_mask_0 = const()[name = string("op_2607_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2607_cast_fp16 = slice_by_index(begin = var_2607_begin_0, end = var_2607_end_0, end_mask = var_2607_end_mask_0, x = coreml_update_state_55)[name = string("op_2607_cast_fp16")]; + tensor var_2614_begin_0 = const()[name = string("op_2614_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_2614_end_0 = const()[name = string("op_2614_end_0"), val = tensor([24, 1, 512, 256])]; + tensor var_2614_end_mask_0 = const()[name = string("op_2614_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2614_cast_fp16 = slice_by_index(begin = var_2614_begin_0, end = var_2614_end_0, end_mask = var_2614_end_mask_0, x = coreml_update_state_55)[name = string("op_2614_cast_fp16")]; + tensor var_2651 = const()[name = string("op_2651"), val = tensor([1, 4, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2651, x = var_2607_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2671 = const()[name = string("op_2671"), val = tensor([1, 4, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2671, x = var_2614_cast_fp16)[name = string("x_27_cast_fp16")]; + bool var_2698_transpose_x_1 = const()[name = string("op_2698_transpose_x_1"), val = bool(false)]; + bool var_2698_transpose_y_1 = const()[name = string("op_2698_transpose_y_1"), val = bool(true)]; + tensor var_2698 = matmul(transpose_x = var_2698_transpose_x_1, transpose_y = var_2698_transpose_y_1, x = query_states_5_cast_fp16, y = x_21_cast_fp16)[name = string("op_2698")]; + fp16 var_2699_to_fp16 = const()[name = string("op_2699_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_2698, y = var_2699_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = var_2105)[name = string("attn_weights_9_cast_fp16")]; + int32 var_2734 = const()[name = string("op_2734"), val = int32(-1)]; + tensor attn_weights_11_cast_fp16 = softmax(axis = var_2734, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; + bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; + tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = x_27_cast_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor var_2745_perm_0 = const()[name = string("op_2745_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2749 = const()[name = string("op_2749"), val = tensor([1, 1, 1024])]; + tensor var_2745_cast_fp16 = transpose(perm = var_2745_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_165")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2749, x = var_2745_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2754 = const()[name = string("op_2754"), val = tensor([0, 2, 1])]; + string var_2770_pad_type_0 = const()[name = string("op_2770_pad_type_0"), val = string("valid")]; + int32 var_2770_groups_0 = const()[name = string("op_2770_groups_0"), val = int32(1)]; + tensor var_2770_strides_0 = const()[name = string("op_2770_strides_0"), val = tensor([1])]; + tensor var_2770_pad_0 = const()[name = string("op_2770_pad_0"), val = tensor([0, 0])]; + tensor var_2770_dilations_0 = const()[name = string("op_2770_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399057280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399942080))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2755_cast_fp16 = transpose(perm = var_2754, x = attn_output_15_cast_fp16)[name = string("transpose_164")]; + tensor var_2770_cast_fp16 = conv(dilations = var_2770_dilations_0, groups = var_2770_groups_0, pad = var_2770_pad_0, pad_type = var_2770_pad_type_0, strides = var_2770_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2755_cast_fp16)[name = string("op_2770_cast_fp16")]; + tensor var_2774 = const()[name = string("op_2774"), val = tensor([0, 2, 1])]; + int32 var_2785 = const()[name = string("op_2785"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_29_cast_fp16 = transpose(perm = var_2774, x = var_2770_cast_fp16)[name = string("transpose_163")]; + tensor var_2787_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_2787_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_2785, interleave = input_31_interleave_0, values = (hidden_states_29_cast_fp16, var_2787_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2782_to_fp16 = const()[name = string("op_2782_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2782_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; + tensor var_2801_to_fp16 = const()[name = string("op_2801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399979008)))]; + tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2801_to_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + int32 var_2814 = const()[name = string("op_2814"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2816_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_2816_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_2814, interleave = input_33_interleave_0, values = (hidden_states_31_cast_fp16, var_2816_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2811_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; + tensor var_2830_to_fp16 = const()[name = string("op_2830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399981376)))]; + tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2830_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_2842 = const()[name = string("op_2842"), val = tensor([0, 2, 1])]; + tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; + tensor var_2843_cast_fp16 = transpose(perm = var_2842, x = x_29_cast_fp16)[name = string("transpose_162")]; + tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2843_cast_fp16)[name = string("input_35_cast_fp16")]; + string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; + tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; + tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; + int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399983744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405955776))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406177024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412149056))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; + string var_2868_mode_0 = const()[name = string("op_2868_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2868_cast_fp16 = gelu(mode = var_2868_mode_0, x = x_31_cast_fp16)[name = string("op_2868_cast_fp16")]; + tensor input_37_cast_fp16 = mul(x = var_2868_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412370304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418342336))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; + tensor var_2876_axes_0 = const()[name = string("op_2876_axes_0"), val = tensor([2])]; + tensor var_2876_cast_fp16 = squeeze(axes = var_2876_axes_0, x = e_3_cast_fp16)[name = string("op_2876_cast_fp16")]; + tensor var_2877 = const()[name = string("op_2877"), val = tensor([0, 2, 1])]; + int32 var_2888 = const()[name = string("op_2888"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_33_cast_fp16 = transpose(perm = var_2877, x = var_2876_cast_fp16)[name = string("transpose_161")]; + tensor var_2890_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2890_cast_fp16")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39_cast_fp16 = concat(axis = var_2888, interleave = input_39_interleave_0, values = (hidden_states_33_cast_fp16, var_2890_cast_fp16))[name = string("input_39_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_2885_to_fp16 = const()[name = string("op_2885_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2885_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor var_2904_to_fp16 = const()[name = string("op_2904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418379264)))]; + tensor hidden_states_35_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_2904_to_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + int32 var_2955 = const()[name = string("op_2955"), val = int32(-1)]; + fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2957_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_2957_cast_fp16")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41_cast_fp16 = concat(axis = var_2955, interleave = input_41_interleave_0, values = (hidden_states_37_cast_fp16, var_2957_cast_fp16))[name = string("input_41_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2952_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor var_2971_to_fp16 = const()[name = string("op_2971_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418381632)))]; + tensor hidden_states_39_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_2971_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor var_2976 = const()[name = string("op_2976"), val = tensor([0, 2, 1])]; + tensor var_2979_axes_0 = const()[name = string("op_2979_axes_0"), val = tensor([2])]; + tensor var_2977_cast_fp16 = transpose(perm = var_2976, x = hidden_states_39_cast_fp16)[name = string("transpose_160")]; + tensor var_2979_cast_fp16 = expand_dims(axes = var_2979_axes_0, x = var_2977_cast_fp16)[name = string("op_2979_cast_fp16")]; + string var_2995_pad_type_0 = const()[name = string("op_2995_pad_type_0"), val = string("valid")]; + tensor var_2995_strides_0 = const()[name = string("op_2995_strides_0"), val = tensor([1, 1])]; + tensor var_2995_pad_0 = const()[name = string("op_2995_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2995_dilations_0 = const()[name = string("op_2995_dilations_0"), val = tensor([1, 1])]; + int32 var_2995_groups_0 = const()[name = string("op_2995_groups_0"), val = int32(1)]; + tensor var_2995 = conv(dilations = var_2995_dilations_0, groups = var_2995_groups_0, pad = var_2995_pad_0, pad_type = var_2995_pad_type_0, strides = var_2995_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2979_cast_fp16)[name = string("op_2995")]; + tensor var_3000 = const()[name = string("op_3000"), val = tensor([1, 4, 1, 256])]; + tensor var_3001 = reshape(shape = var_3000, x = var_2995)[name = string("op_3001")]; + string var_3017_pad_type_0 = const()[name = string("op_3017_pad_type_0"), val = string("valid")]; + tensor var_3017_strides_0 = const()[name = string("op_3017_strides_0"), val = tensor([1, 1])]; + tensor var_3017_pad_0 = const()[name = string("op_3017_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3017_dilations_0 = const()[name = string("op_3017_dilations_0"), val = tensor([1, 1])]; + int32 var_3017_groups_0 = const()[name = string("op_3017_groups_0"), val = int32(1)]; + tensor var_3017 = conv(dilations = var_3017_dilations_0, groups = var_3017_groups_0, pad = var_3017_pad_0, pad_type = var_3017_pad_type_0, strides = var_3017_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2979_cast_fp16)[name = string("op_3017")]; + tensor var_3022 = const()[name = string("op_3022"), val = tensor([1, 1, 1, 256])]; + tensor var_3023 = reshape(shape = var_3022, x = var_3017)[name = string("op_3023")]; + string var_3039_pad_type_0 = const()[name = string("op_3039_pad_type_0"), val = string("valid")]; + tensor var_3039_strides_0 = const()[name = string("op_3039_strides_0"), val = tensor([1, 1])]; + tensor var_3039_pad_0 = const()[name = string("op_3039_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3039_dilations_0 = const()[name = string("op_3039_dilations_0"), val = tensor([1, 1])]; + int32 var_3039_groups_0 = const()[name = string("op_3039_groups_0"), val = int32(1)]; + tensor var_3039 = conv(dilations = var_3039_dilations_0, groups = var_3039_groups_0, pad = var_3039_pad_0, pad_type = var_3039_pad_type_0, strides = var_3039_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2979_cast_fp16)[name = string("op_3039")]; + tensor var_3044 = const()[name = string("op_3044"), val = tensor([1, 1, 1, 256])]; + tensor var_3045 = reshape(shape = var_3044, x = var_3039)[name = string("op_3045")]; + int32 var_3060 = const()[name = string("op_3060"), val = int32(-1)]; + fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; + tensor var_3062 = mul(x = var_3001, y = const_80_promoted)[name = string("op_3062")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45 = concat(axis = var_3060, interleave = input_45_interleave_0, values = (var_3001, var_3062))[name = string("input_45")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3057_to_fp16 = const()[name = string("op_3057_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3057_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor var_3076_to_fp16 = const()[name = string("op_3076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384000)))]; + tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3076_to_fp16)[name = string("q_5_cast_fp16")]; + int32 var_3087 = const()[name = string("op_3087"), val = int32(-1)]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_3089 = mul(x = var_3023, y = const_84_promoted)[name = string("op_3089")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47 = concat(axis = var_3087, interleave = input_47_interleave_0, values = (var_3023, var_3089))[name = string("input_47")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3084_to_fp16 = const()[name = string("op_3084_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3084_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor var_3103_to_fp16 = const()[name = string("op_3103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384576)))]; + tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3103_to_fp16)[name = string("k_5_cast_fp16")]; + tensor var_3105_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3105_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3126_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_3126_cast_fp16")]; + int32 var_3128 = const()[name = string("op_3128"), val = int32(-1)]; + bool var_3129_interleave_0 = const()[name = string("op_3129_interleave_0"), val = bool(false)]; + tensor var_3129_cast_fp16 = concat(axis = var_3128, interleave = var_3129_interleave_0, values = (var_3126_cast_fp16, x1_9_cast_fp16))[name = string("op_3129_cast_fp16")]; + tensor var_3130_cast_fp16 = mul(x = var_3129_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3130_cast_fp16")]; + tensor query_states_9_cast_fp16 = add(x = var_3105_cast_fp16, y = var_3130_cast_fp16)[name = string("query_states_9_cast_fp16")]; + tensor var_3133_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3133_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3154_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_3154_cast_fp16")]; + int32 var_3156 = const()[name = string("op_3156"), val = int32(-1)]; + bool var_3157_interleave_0 = const()[name = string("op_3157_interleave_0"), val = bool(false)]; + tensor var_3157_cast_fp16 = concat(axis = var_3156, interleave = var_3157_interleave_0, values = (var_3154_cast_fp16, x1_11_cast_fp16))[name = string("op_3157_cast_fp16")]; + tensor var_3158_cast_fp16 = mul(x = var_3157_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3158_cast_fp16")]; + tensor key_states_9_cast_fp16 = add(x = var_3133_cast_fp16, y = var_3158_cast_fp16)[name = string("key_states_9_cast_fp16")]; + tensor key_slice_5_begin_0 = const()[name = string("key_slice_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor key_slice_5_end_0 = const()[name = string("key_slice_5_end_0"), val = tensor([3, 1, 512, 256])]; + tensor key_slice_5_end_mask_0 = const()[name = string("key_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_5_cast_fp16 = slice_by_index(begin = key_slice_5_begin_0, end = key_slice_5_end_0, end_mask = key_slice_5_end_mask_0, x = coreml_update_state_55)[name = string("key_slice_5_cast_fp16")]; + tensor key_tail_5_begin_0 = const()[name = string("key_tail_5_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_5_end_0 = const()[name = string("key_tail_5_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_5_cast_fp16 = slice_by_index(begin = key_tail_5_begin_0, end = key_tail_5_end_0, x = key_slice_5_cast_fp16)[name = string("key_tail_5_cast_fp16")]; + int32 var_3171 = const()[name = string("op_3171"), val = int32(2)]; + bool shifted_key_5_interleave_0 = const()[name = string("shifted_key_5_interleave_0"), val = bool(false)]; + tensor shifted_key_5_cast_fp16 = concat(axis = var_3171, interleave = shifted_key_5_interleave_0, values = (key_tail_5_cast_fp16, key_states_9_cast_fp16))[name = string("shifted_key_5_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([2, 0, 0, 0])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([3, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_8, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_9, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = shifted_key_5_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_56")]; + tensor value_slice_5_begin_0 = const()[name = string("value_slice_5_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor value_slice_5_end_0 = const()[name = string("value_slice_5_end_0"), val = tensor([25, 1, 512, 256])]; + tensor value_slice_5_end_mask_0 = const()[name = string("value_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_5_cast_fp16 = slice_by_index(begin = value_slice_5_begin_0, end = value_slice_5_end_0, end_mask = value_slice_5_end_mask_0, x = coreml_update_state_56)[name = string("value_slice_5_cast_fp16")]; + tensor value_tail_5_begin_0 = const()[name = string("value_tail_5_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_5_end_0 = const()[name = string("value_tail_5_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_5_cast_fp16 = slice_by_index(begin = value_tail_5_begin_0, end = value_tail_5_end_0, x = value_slice_5_cast_fp16)[name = string("value_tail_5_cast_fp16")]; + int32 var_3205 = const()[name = string("op_3205"), val = int32(2)]; + bool shifted_value_5_interleave_0 = const()[name = string("shifted_value_5_interleave_0"), val = bool(false)]; + tensor shifted_value_5_cast_fp16 = concat(axis = var_3205, interleave = shifted_value_5_interleave_0, values = (value_tail_5_cast_fp16, var_3045))[name = string("shifted_value_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([24, 0, 0, 0])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([25, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = shifted_value_5_cast_fp16, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_57")]; + tensor var_3233_begin_0 = const()[name = string("op_3233_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_3233_end_0 = const()[name = string("op_3233_end_0"), val = tensor([3, 1, 512, 256])]; + tensor var_3233_end_mask_0 = const()[name = string("op_3233_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3233_cast_fp16 = slice_by_index(begin = var_3233_begin_0, end = var_3233_end_0, end_mask = var_3233_end_mask_0, x = coreml_update_state_57)[name = string("op_3233_cast_fp16")]; + tensor var_3240_begin_0 = const()[name = string("op_3240_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_3240_end_0 = const()[name = string("op_3240_end_0"), val = tensor([25, 1, 512, 256])]; + tensor var_3240_end_mask_0 = const()[name = string("op_3240_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3240_cast_fp16 = slice_by_index(begin = var_3240_begin_0, end = var_3240_end_0, end_mask = var_3240_end_mask_0, x = coreml_update_state_57)[name = string("op_3240_cast_fp16")]; + tensor var_3277 = const()[name = string("op_3277"), val = tensor([1, 4, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_3277, x = var_3233_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_3297 = const()[name = string("op_3297"), val = tensor([1, 4, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_3297, x = var_3240_cast_fp16)[name = string("x_43_cast_fp16")]; + bool var_3324_transpose_x_1 = const()[name = string("op_3324_transpose_x_1"), val = bool(false)]; + bool var_3324_transpose_y_1 = const()[name = string("op_3324_transpose_y_1"), val = bool(true)]; + tensor var_3324 = matmul(transpose_x = var_3324_transpose_x_1, transpose_y = var_3324_transpose_y_1, x = query_states_9_cast_fp16, y = x_37_cast_fp16)[name = string("op_3324")]; + fp16 var_3325_to_fp16 = const()[name = string("op_3325_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_3324, y = var_3325_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = var_2105)[name = string("attn_weights_15_cast_fp16")]; + int32 var_3360 = const()[name = string("op_3360"), val = int32(-1)]; + tensor attn_weights_17_cast_fp16 = softmax(axis = var_3360, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; + bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = x_43_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_3371_perm_0 = const()[name = string("op_3371_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3375 = const()[name = string("op_3375"), val = tensor([1, 1, 1024])]; + tensor var_3371_cast_fp16 = transpose(perm = var_3371_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_159")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_3375, x = var_3371_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3380 = const()[name = string("op_3380"), val = tensor([0, 2, 1])]; + string var_3396_pad_type_0 = const()[name = string("op_3396_pad_type_0"), val = string("valid")]; + int32 var_3396_groups_0 = const()[name = string("op_3396_groups_0"), val = int32(1)]; + tensor var_3396_strides_0 = const()[name = string("op_3396_strides_0"), val = tensor([1])]; + tensor var_3396_pad_0 = const()[name = string("op_3396_pad_0"), val = tensor([0, 0])]; + tensor var_3396_dilations_0 = const()[name = string("op_3396_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418385152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419269952))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3381_cast_fp16 = transpose(perm = var_3380, x = attn_output_25_cast_fp16)[name = string("transpose_158")]; + tensor var_3396_cast_fp16 = conv(dilations = var_3396_dilations_0, groups = var_3396_groups_0, pad = var_3396_pad_0, pad_type = var_3396_pad_type_0, strides = var_3396_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3381_cast_fp16)[name = string("op_3396_cast_fp16")]; + tensor var_3400 = const()[name = string("op_3400"), val = tensor([0, 2, 1])]; + int32 var_3411 = const()[name = string("op_3411"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_45_cast_fp16 = transpose(perm = var_3400, x = var_3396_cast_fp16)[name = string("transpose_157")]; + tensor var_3413_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3413_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_3411, interleave = input_51_interleave_0, values = (hidden_states_45_cast_fp16, var_3413_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3408_to_fp16 = const()[name = string("op_3408_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3408_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor var_3427_to_fp16 = const()[name = string("op_3427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419306880)))]; + tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3427_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + int32 var_3440 = const()[name = string("op_3440"), val = int32(-1)]; + fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3442_cast_fp16 = mul(x = hidden_states_47_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_3442_cast_fp16")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53_cast_fp16 = concat(axis = var_3440, interleave = input_53_interleave_0, values = (hidden_states_47_cast_fp16, var_3442_cast_fp16))[name = string("input_53_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3437_to_fp16 = const()[name = string("op_3437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3437_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor var_3456_to_fp16 = const()[name = string("op_3456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419309248)))]; + tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3456_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_3468 = const()[name = string("op_3468"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_3469_cast_fp16 = transpose(perm = var_3468, x = x_45_cast_fp16)[name = string("transpose_156")]; + tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3469_cast_fp16)[name = string("input_55_cast_fp16")]; + string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; + tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; + tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; + int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419311616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425283648))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425504896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431476928))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; + string var_3494_mode_0 = const()[name = string("op_3494_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3494_cast_fp16 = gelu(mode = var_3494_mode_0, x = x_47_cast_fp16)[name = string("op_3494_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = var_3494_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431698176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437670208))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; + tensor var_3502_axes_0 = const()[name = string("op_3502_axes_0"), val = tensor([2])]; + tensor var_3502_cast_fp16 = squeeze(axes = var_3502_axes_0, x = e_5_cast_fp16)[name = string("op_3502_cast_fp16")]; + tensor var_3503 = const()[name = string("op_3503"), val = tensor([0, 2, 1])]; + int32 var_3514 = const()[name = string("op_3514"), val = int32(-1)]; + fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_49_cast_fp16 = transpose(perm = var_3503, x = var_3502_cast_fp16)[name = string("transpose_155")]; + tensor var_3516_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_3516_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_3514, interleave = input_59_interleave_0, values = (hidden_states_49_cast_fp16, var_3516_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3511_to_fp16 = const()[name = string("op_3511_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3511_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; + tensor var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437707136)))]; + tensor hidden_states_51_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3530_to_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + int32 var_3581 = const()[name = string("op_3581"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3583_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3583_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_3581, interleave = input_61_interleave_0, values = (hidden_states_53_cast_fp16, var_3583_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3578_to_fp16 = const()[name = string("op_3578_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3578_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; + tensor var_3597_to_fp16 = const()[name = string("op_3597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437709504)))]; + tensor hidden_states_55_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3597_to_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor var_3602 = const()[name = string("op_3602"), val = tensor([0, 2, 1])]; + tensor var_3605_axes_0 = const()[name = string("op_3605_axes_0"), val = tensor([2])]; + tensor var_3603_cast_fp16 = transpose(perm = var_3602, x = hidden_states_55_cast_fp16)[name = string("transpose_154")]; + tensor var_3605_cast_fp16 = expand_dims(axes = var_3605_axes_0, x = var_3603_cast_fp16)[name = string("op_3605_cast_fp16")]; + string var_3621_pad_type_0 = const()[name = string("op_3621_pad_type_0"), val = string("valid")]; + tensor var_3621_strides_0 = const()[name = string("op_3621_strides_0"), val = tensor([1, 1])]; + tensor var_3621_pad_0 = const()[name = string("op_3621_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3621_dilations_0 = const()[name = string("op_3621_dilations_0"), val = tensor([1, 1])]; + int32 var_3621_groups_0 = const()[name = string("op_3621_groups_0"), val = int32(1)]; + tensor var_3621 = conv(dilations = var_3621_dilations_0, groups = var_3621_groups_0, pad = var_3621_pad_0, pad_type = var_3621_pad_type_0, strides = var_3621_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3605_cast_fp16)[name = string("op_3621")]; + tensor var_3626 = const()[name = string("op_3626"), val = tensor([1, 4, 1, 256])]; + tensor var_3627 = reshape(shape = var_3626, x = var_3621)[name = string("op_3627")]; + string var_3643_pad_type_0 = const()[name = string("op_3643_pad_type_0"), val = string("valid")]; + tensor var_3643_strides_0 = const()[name = string("op_3643_strides_0"), val = tensor([1, 1])]; + tensor var_3643_pad_0 = const()[name = string("op_3643_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3643_dilations_0 = const()[name = string("op_3643_dilations_0"), val = tensor([1, 1])]; + int32 var_3643_groups_0 = const()[name = string("op_3643_groups_0"), val = int32(1)]; + tensor var_3643 = conv(dilations = var_3643_dilations_0, groups = var_3643_groups_0, pad = var_3643_pad_0, pad_type = var_3643_pad_type_0, strides = var_3643_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3605_cast_fp16)[name = string("op_3643")]; + tensor var_3648 = const()[name = string("op_3648"), val = tensor([1, 1, 1, 256])]; + tensor var_3649 = reshape(shape = var_3648, x = var_3643)[name = string("op_3649")]; + string var_3665_pad_type_0 = const()[name = string("op_3665_pad_type_0"), val = string("valid")]; + tensor var_3665_strides_0 = const()[name = string("op_3665_strides_0"), val = tensor([1, 1])]; + tensor var_3665_pad_0 = const()[name = string("op_3665_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3665_dilations_0 = const()[name = string("op_3665_dilations_0"), val = tensor([1, 1])]; + int32 var_3665_groups_0 = const()[name = string("op_3665_groups_0"), val = int32(1)]; + tensor var_3665 = conv(dilations = var_3665_dilations_0, groups = var_3665_groups_0, pad = var_3665_pad_0, pad_type = var_3665_pad_type_0, strides = var_3665_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3605_cast_fp16)[name = string("op_3665")]; + tensor var_3670 = const()[name = string("op_3670"), val = tensor([1, 1, 1, 256])]; + tensor var_3671 = reshape(shape = var_3670, x = var_3665)[name = string("op_3671")]; + int32 var_3686 = const()[name = string("op_3686"), val = int32(-1)]; + fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; + tensor var_3688 = mul(x = var_3627, y = const_118_promoted)[name = string("op_3688")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_3686, interleave = input_65_interleave_0, values = (var_3627, var_3688))[name = string("input_65")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_3683_to_fp16 = const()[name = string("op_3683_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3683_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; + tensor var_3702_to_fp16 = const()[name = string("op_3702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437711872)))]; + tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3702_to_fp16)[name = string("q_7_cast_fp16")]; + int32 var_3713 = const()[name = string("op_3713"), val = int32(-1)]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_3715 = mul(x = var_3649, y = const_122_promoted)[name = string("op_3715")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_3713, interleave = input_67_interleave_0, values = (var_3649, var_3715))[name = string("input_67")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_3710_to_fp16 = const()[name = string("op_3710_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3710_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; + tensor var_3729_to_fp16 = const()[name = string("op_3729_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437712448)))]; + tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3729_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_3731_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3731_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3752_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_3752_cast_fp16")]; + int32 var_3754 = const()[name = string("op_3754"), val = int32(-1)]; + bool var_3755_interleave_0 = const()[name = string("op_3755_interleave_0"), val = bool(false)]; + tensor var_3755_cast_fp16 = concat(axis = var_3754, interleave = var_3755_interleave_0, values = (var_3752_cast_fp16, x1_13_cast_fp16))[name = string("op_3755_cast_fp16")]; + tensor var_3756_cast_fp16 = mul(x = var_3755_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3756_cast_fp16")]; + tensor query_states_13_cast_fp16 = add(x = var_3731_cast_fp16, y = var_3756_cast_fp16)[name = string("query_states_13_cast_fp16")]; + tensor var_3759_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3759_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3780_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_3780_cast_fp16")]; + int32 var_3782 = const()[name = string("op_3782"), val = int32(-1)]; + bool var_3783_interleave_0 = const()[name = string("op_3783_interleave_0"), val = bool(false)]; + tensor var_3783_cast_fp16 = concat(axis = var_3782, interleave = var_3783_interleave_0, values = (var_3780_cast_fp16, x1_15_cast_fp16))[name = string("op_3783_cast_fp16")]; + tensor var_3784_cast_fp16 = mul(x = var_3783_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3784_cast_fp16")]; + tensor key_states_13_cast_fp16 = add(x = var_3759_cast_fp16, y = var_3784_cast_fp16)[name = string("key_states_13_cast_fp16")]; + tensor key_slice_7_begin_0 = const()[name = string("key_slice_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor key_slice_7_end_0 = const()[name = string("key_slice_7_end_0"), val = tensor([4, 1, 512, 256])]; + tensor key_slice_7_end_mask_0 = const()[name = string("key_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_7_cast_fp16 = slice_by_index(begin = key_slice_7_begin_0, end = key_slice_7_end_0, end_mask = key_slice_7_end_mask_0, x = coreml_update_state_57)[name = string("key_slice_7_cast_fp16")]; + tensor key_tail_7_begin_0 = const()[name = string("key_tail_7_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_7_end_0 = const()[name = string("key_tail_7_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_7_cast_fp16 = slice_by_index(begin = key_tail_7_begin_0, end = key_tail_7_end_0, x = key_slice_7_cast_fp16)[name = string("key_tail_7_cast_fp16")]; + int32 var_3797 = const()[name = string("op_3797"), val = int32(2)]; + bool shifted_key_7_interleave_0 = const()[name = string("shifted_key_7_interleave_0"), val = bool(false)]; + tensor shifted_key_7_cast_fp16 = concat(axis = var_3797, interleave = shifted_key_7_interleave_0, values = (key_tail_7_cast_fp16, key_states_13_cast_fp16))[name = string("shifted_key_7_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([3, 0, 0, 0])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([4, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_12, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_13, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = shifted_key_7_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_58")]; + tensor value_slice_7_begin_0 = const()[name = string("value_slice_7_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor value_slice_7_end_0 = const()[name = string("value_slice_7_end_0"), val = tensor([26, 1, 512, 256])]; + tensor value_slice_7_end_mask_0 = const()[name = string("value_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_7_cast_fp16 = slice_by_index(begin = value_slice_7_begin_0, end = value_slice_7_end_0, end_mask = value_slice_7_end_mask_0, x = coreml_update_state_58)[name = string("value_slice_7_cast_fp16")]; + tensor value_tail_7_begin_0 = const()[name = string("value_tail_7_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_7_end_0 = const()[name = string("value_tail_7_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_7_cast_fp16 = slice_by_index(begin = value_tail_7_begin_0, end = value_tail_7_end_0, x = value_slice_7_cast_fp16)[name = string("value_tail_7_cast_fp16")]; + int32 var_3831 = const()[name = string("op_3831"), val = int32(2)]; + bool shifted_value_7_interleave_0 = const()[name = string("shifted_value_7_interleave_0"), val = bool(false)]; + tensor shifted_value_7_cast_fp16 = concat(axis = var_3831, interleave = shifted_value_7_interleave_0, values = (value_tail_7_cast_fp16, var_3671))[name = string("shifted_value_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([25, 0, 0, 0])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([26, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = shifted_value_7_cast_fp16, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_59")]; + tensor var_3859_begin_0 = const()[name = string("op_3859_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_3859_end_0 = const()[name = string("op_3859_end_0"), val = tensor([4, 1, 512, 256])]; + tensor var_3859_end_mask_0 = const()[name = string("op_3859_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3859_cast_fp16 = slice_by_index(begin = var_3859_begin_0, end = var_3859_end_0, end_mask = var_3859_end_mask_0, x = coreml_update_state_59)[name = string("op_3859_cast_fp16")]; + tensor var_3866_begin_0 = const()[name = string("op_3866_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_3866_end_0 = const()[name = string("op_3866_end_0"), val = tensor([26, 1, 512, 256])]; + tensor var_3866_end_mask_0 = const()[name = string("op_3866_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3866_cast_fp16 = slice_by_index(begin = var_3866_begin_0, end = var_3866_end_0, end_mask = var_3866_end_mask_0, x = coreml_update_state_59)[name = string("op_3866_cast_fp16")]; + tensor var_3903 = const()[name = string("op_3903"), val = tensor([1, 4, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_3903, x = var_3859_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 4, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_3923, x = var_3866_cast_fp16)[name = string("x_59_cast_fp16")]; + bool var_3950_transpose_x_1 = const()[name = string("op_3950_transpose_x_1"), val = bool(false)]; + bool var_3950_transpose_y_1 = const()[name = string("op_3950_transpose_y_1"), val = bool(true)]; + tensor var_3950 = matmul(transpose_x = var_3950_transpose_x_1, transpose_y = var_3950_transpose_y_1, x = query_states_13_cast_fp16, y = x_53_cast_fp16)[name = string("op_3950")]; + fp16 var_3951_to_fp16 = const()[name = string("op_3951_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_19_cast_fp16 = mul(x = var_3950, y = var_3951_to_fp16)[name = string("attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = var_2105)[name = string("attn_weights_21_cast_fp16")]; + int32 var_3986 = const()[name = string("op_3986"), val = int32(-1)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_3986, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = x_59_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_3997_perm_0 = const()[name = string("op_3997_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4001 = const()[name = string("op_4001"), val = tensor([1, 1, 1024])]; + tensor var_3997_cast_fp16 = transpose(perm = var_3997_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_153")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_4001, x = var_3997_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_4006 = const()[name = string("op_4006"), val = tensor([0, 2, 1])]; + string var_4022_pad_type_0 = const()[name = string("op_4022_pad_type_0"), val = string("valid")]; + int32 var_4022_groups_0 = const()[name = string("op_4022_groups_0"), val = int32(1)]; + tensor var_4022_strides_0 = const()[name = string("op_4022_strides_0"), val = tensor([1])]; + tensor var_4022_pad_0 = const()[name = string("op_4022_pad_0"), val = tensor([0, 0])]; + tensor var_4022_dilations_0 = const()[name = string("op_4022_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437713024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597824))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4007_cast_fp16 = transpose(perm = var_4006, x = attn_output_35_cast_fp16)[name = string("transpose_152")]; + tensor var_4022_cast_fp16 = conv(dilations = var_4022_dilations_0, groups = var_4022_groups_0, pad = var_4022_pad_0, pad_type = var_4022_pad_type_0, strides = var_4022_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_4007_cast_fp16)[name = string("op_4022_cast_fp16")]; + tensor var_4026 = const()[name = string("op_4026"), val = tensor([0, 2, 1])]; + int32 var_4037 = const()[name = string("op_4037"), val = int32(-1)]; + fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_61_cast_fp16 = transpose(perm = var_4026, x = var_4022_cast_fp16)[name = string("transpose_151")]; + tensor var_4039_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_4039_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_4037, interleave = input_71_interleave_0, values = (hidden_states_61_cast_fp16, var_4039_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_4034_to_fp16 = const()[name = string("op_4034_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4034_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; + tensor var_4053_to_fp16 = const()[name = string("op_4053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438634752)))]; + tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4053_to_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + int32 var_4066 = const()[name = string("op_4066"), val = int32(-1)]; + fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4068_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4068_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_4066, interleave = input_73_interleave_0, values = (hidden_states_63_cast_fp16, var_4068_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4063_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; + tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438637120)))]; + tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4082_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_4094 = const()[name = string("op_4094"), val = tensor([0, 2, 1])]; + tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; + tensor var_4095_cast_fp16 = transpose(perm = var_4094, x = x_61_cast_fp16)[name = string("transpose_150")]; + tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4095_cast_fp16)[name = string("input_75_cast_fp16")]; + string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; + tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; + tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; + int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438639488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444611520))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450804800))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; + string var_4120_mode_0 = const()[name = string("op_4120_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4120_cast_fp16 = gelu(mode = var_4120_mode_0, x = x_63_cast_fp16)[name = string("op_4120_cast_fp16")]; + tensor input_77_cast_fp16 = mul(x = var_4120_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451026048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456998080))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; + tensor var_4128_axes_0 = const()[name = string("op_4128_axes_0"), val = tensor([2])]; + tensor var_4128_cast_fp16 = squeeze(axes = var_4128_axes_0, x = e_7_cast_fp16)[name = string("op_4128_cast_fp16")]; + tensor var_4129 = const()[name = string("op_4129"), val = tensor([0, 2, 1])]; + int32 var_4140 = const()[name = string("op_4140"), val = int32(-1)]; + fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_65_cast_fp16 = transpose(perm = var_4129, x = var_4128_cast_fp16)[name = string("transpose_149")]; + tensor var_4142_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_4142_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_4140, interleave = input_79_interleave_0, values = (hidden_states_65_cast_fp16, var_4142_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4137_to_fp16 = const()[name = string("op_4137_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4137_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor var_4156_to_fp16 = const()[name = string("op_4156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457035008)))]; + tensor hidden_states_67_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4156_to_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = hidden_states_67_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_4207 = const()[name = string("op_4207"), val = int32(-1)]; + fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4209_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_152_promoted_to_fp16)[name = string("op_4209_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_4207, interleave = input_81_interleave_0, values = (hidden_states_69_cast_fp16, var_4209_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4204_to_fp16 = const()[name = string("op_4204_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4204_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor var_4223_to_fp16 = const()[name = string("op_4223_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457037376)))]; + tensor hidden_states_71_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4223_to_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor var_4228 = const()[name = string("op_4228"), val = tensor([0, 2, 1])]; + tensor var_4231_axes_0 = const()[name = string("op_4231_axes_0"), val = tensor([2])]; + tensor var_4229_cast_fp16 = transpose(perm = var_4228, x = hidden_states_71_cast_fp16)[name = string("transpose_148")]; + tensor var_4231_cast_fp16 = expand_dims(axes = var_4231_axes_0, x = var_4229_cast_fp16)[name = string("op_4231_cast_fp16")]; + string var_4247_pad_type_0 = const()[name = string("op_4247_pad_type_0"), val = string("valid")]; + tensor var_4247_strides_0 = const()[name = string("op_4247_strides_0"), val = tensor([1, 1])]; + tensor var_4247_pad_0 = const()[name = string("op_4247_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4247_dilations_0 = const()[name = string("op_4247_dilations_0"), val = tensor([1, 1])]; + int32 var_4247_groups_0 = const()[name = string("op_4247_groups_0"), val = int32(1)]; + tensor var_4247 = conv(dilations = var_4247_dilations_0, groups = var_4247_groups_0, pad = var_4247_pad_0, pad_type = var_4247_pad_type_0, strides = var_4247_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4231_cast_fp16)[name = string("op_4247")]; + tensor var_4252 = const()[name = string("op_4252"), val = tensor([1, 4, 1, 256])]; + tensor var_4253 = reshape(shape = var_4252, x = var_4247)[name = string("op_4253")]; + string var_4269_pad_type_0 = const()[name = string("op_4269_pad_type_0"), val = string("valid")]; + tensor var_4269_strides_0 = const()[name = string("op_4269_strides_0"), val = tensor([1, 1])]; + tensor var_4269_pad_0 = const()[name = string("op_4269_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4269_dilations_0 = const()[name = string("op_4269_dilations_0"), val = tensor([1, 1])]; + int32 var_4269_groups_0 = const()[name = string("op_4269_groups_0"), val = int32(1)]; + tensor var_4269 = conv(dilations = var_4269_dilations_0, groups = var_4269_groups_0, pad = var_4269_pad_0, pad_type = var_4269_pad_type_0, strides = var_4269_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4231_cast_fp16)[name = string("op_4269")]; + tensor var_4274 = const()[name = string("op_4274"), val = tensor([1, 1, 1, 256])]; + tensor var_4275 = reshape(shape = var_4274, x = var_4269)[name = string("op_4275")]; + string var_4291_pad_type_0 = const()[name = string("op_4291_pad_type_0"), val = string("valid")]; + tensor var_4291_strides_0 = const()[name = string("op_4291_strides_0"), val = tensor([1, 1])]; + tensor var_4291_pad_0 = const()[name = string("op_4291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4291_dilations_0 = const()[name = string("op_4291_dilations_0"), val = tensor([1, 1])]; + int32 var_4291_groups_0 = const()[name = string("op_4291_groups_0"), val = int32(1)]; + tensor var_4291 = conv(dilations = var_4291_dilations_0, groups = var_4291_groups_0, pad = var_4291_pad_0, pad_type = var_4291_pad_type_0, strides = var_4291_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4231_cast_fp16)[name = string("op_4291")]; + tensor var_4296 = const()[name = string("op_4296"), val = tensor([1, 1, 1, 256])]; + tensor var_4297 = reshape(shape = var_4296, x = var_4291)[name = string("op_4297")]; + int32 var_4312 = const()[name = string("op_4312"), val = int32(-1)]; + fp16 const_156_promoted = const()[name = string("const_156_promoted"), val = fp16(-0x1p+0)]; + tensor var_4314 = mul(x = var_4253, y = const_156_promoted)[name = string("op_4314")]; + bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; + tensor input_85 = concat(axis = var_4312, interleave = input_85_interleave_0, values = (var_4253, var_4314))[name = string("input_85")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4309_to_fp16 = const()[name = string("op_4309_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4309_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor var_4328_to_fp16 = const()[name = string("op_4328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457039744)))]; + tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4328_to_fp16)[name = string("q_9_cast_fp16")]; + int32 var_4339 = const()[name = string("op_4339"), val = int32(-1)]; + fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; + tensor var_4341 = mul(x = var_4275, y = const_160_promoted)[name = string("op_4341")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_4339, interleave = input_87_interleave_0, values = (var_4275, var_4341))[name = string("input_87")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4336_to_fp16 = const()[name = string("op_4336_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4336_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor var_4355_to_fp16 = const()[name = string("op_4355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040320)))]; + tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4355_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_4357_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4357_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4378_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_4378_cast_fp16")]; + int32 var_4380 = const()[name = string("op_4380"), val = int32(-1)]; + bool var_4381_interleave_0 = const()[name = string("op_4381_interleave_0"), val = bool(false)]; + tensor var_4381_cast_fp16 = concat(axis = var_4380, interleave = var_4381_interleave_0, values = (var_4378_cast_fp16, x1_17_cast_fp16))[name = string("op_4381_cast_fp16")]; + tensor var_4382_cast_fp16 = mul(x = var_4381_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4382_cast_fp16")]; + tensor query_states_17_cast_fp16 = add(x = var_4357_cast_fp16, y = var_4382_cast_fp16)[name = string("query_states_17_cast_fp16")]; + tensor var_4385_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4385_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4406_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4406_cast_fp16")]; + int32 var_4408 = const()[name = string("op_4408"), val = int32(-1)]; + bool var_4409_interleave_0 = const()[name = string("op_4409_interleave_0"), val = bool(false)]; + tensor var_4409_cast_fp16 = concat(axis = var_4408, interleave = var_4409_interleave_0, values = (var_4406_cast_fp16, x1_19_cast_fp16))[name = string("op_4409_cast_fp16")]; + tensor var_4410_cast_fp16 = mul(x = var_4409_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4410_cast_fp16")]; + tensor key_states_17_cast_fp16 = add(x = var_4385_cast_fp16, y = var_4410_cast_fp16)[name = string("key_states_17_cast_fp16")]; + tensor key_slice_9_begin_0 = const()[name = string("key_slice_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor key_slice_9_end_0 = const()[name = string("key_slice_9_end_0"), val = tensor([5, 1, 512, 256])]; + tensor key_slice_9_end_mask_0 = const()[name = string("key_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_9_cast_fp16 = slice_by_index(begin = key_slice_9_begin_0, end = key_slice_9_end_0, end_mask = key_slice_9_end_mask_0, x = coreml_update_state_59)[name = string("key_slice_9_cast_fp16")]; + tensor key_tail_9_begin_0 = const()[name = string("key_tail_9_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_9_end_0 = const()[name = string("key_tail_9_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_9_cast_fp16 = slice_by_index(begin = key_tail_9_begin_0, end = key_tail_9_end_0, x = key_slice_9_cast_fp16)[name = string("key_tail_9_cast_fp16")]; + int32 var_4423 = const()[name = string("op_4423"), val = int32(2)]; + bool shifted_key_9_interleave_0 = const()[name = string("shifted_key_9_interleave_0"), val = bool(false)]; + tensor shifted_key_9_cast_fp16 = concat(axis = var_4423, interleave = shifted_key_9_interleave_0, values = (key_tail_9_cast_fp16, key_states_17_cast_fp16))[name = string("shifted_key_9_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 0, 0, 0])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([5, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_16, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_17, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = shifted_key_9_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_60")]; + tensor value_slice_9_begin_0 = const()[name = string("value_slice_9_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor value_slice_9_end_0 = const()[name = string("value_slice_9_end_0"), val = tensor([27, 1, 512, 256])]; + tensor value_slice_9_end_mask_0 = const()[name = string("value_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_9_cast_fp16 = slice_by_index(begin = value_slice_9_begin_0, end = value_slice_9_end_0, end_mask = value_slice_9_end_mask_0, x = coreml_update_state_60)[name = string("value_slice_9_cast_fp16")]; + tensor value_tail_9_begin_0 = const()[name = string("value_tail_9_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_9_end_0 = const()[name = string("value_tail_9_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_9_cast_fp16 = slice_by_index(begin = value_tail_9_begin_0, end = value_tail_9_end_0, x = value_slice_9_cast_fp16)[name = string("value_tail_9_cast_fp16")]; + int32 var_4457 = const()[name = string("op_4457"), val = int32(2)]; + bool shifted_value_9_interleave_0 = const()[name = string("shifted_value_9_interleave_0"), val = bool(false)]; + tensor shifted_value_9_cast_fp16 = concat(axis = var_4457, interleave = shifted_value_9_interleave_0, values = (value_tail_9_cast_fp16, var_4297))[name = string("shifted_value_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([26, 0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([27, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = shifted_value_9_cast_fp16, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_61")]; + tensor var_4485_begin_0 = const()[name = string("op_4485_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_4485_end_0 = const()[name = string("op_4485_end_0"), val = tensor([5, 1, 512, 256])]; + tensor var_4485_end_mask_0 = const()[name = string("op_4485_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4485_cast_fp16 = slice_by_index(begin = var_4485_begin_0, end = var_4485_end_0, end_mask = var_4485_end_mask_0, x = coreml_update_state_61)[name = string("op_4485_cast_fp16")]; + tensor var_4492_begin_0 = const()[name = string("op_4492_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_4492_end_0 = const()[name = string("op_4492_end_0"), val = tensor([27, 1, 512, 256])]; + tensor var_4492_end_mask_0 = const()[name = string("op_4492_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4492_cast_fp16 = slice_by_index(begin = var_4492_begin_0, end = var_4492_end_0, end_mask = var_4492_end_mask_0, x = coreml_update_state_61)[name = string("op_4492_cast_fp16")]; + tensor var_4529 = const()[name = string("op_4529"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_4529, x = var_4485_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_4549 = const()[name = string("op_4549"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_4549, x = var_4492_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_4576_transpose_x_1 = const()[name = string("op_4576_transpose_x_1"), val = bool(false)]; + bool var_4576_transpose_y_1 = const()[name = string("op_4576_transpose_y_1"), val = bool(true)]; + tensor var_4576 = matmul(transpose_x = var_4576_transpose_x_1, transpose_y = var_4576_transpose_y_1, x = query_states_17_cast_fp16, y = x_69_cast_fp16)[name = string("op_4576")]; + fp16 var_4577_to_fp16 = const()[name = string("op_4577_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_4576, y = var_4577_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = var_2105)[name = string("attn_weights_27_cast_fp16")]; + int32 var_4612 = const()[name = string("op_4612"), val = int32(-1)]; + tensor attn_weights_29_cast_fp16 = softmax(axis = var_4612, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; + bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; + tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = x_75_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_4623_perm_0 = const()[name = string("op_4623_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4627 = const()[name = string("op_4627"), val = tensor([1, 1, 1024])]; + tensor var_4623_cast_fp16 = transpose(perm = var_4623_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_147")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_4627, x = var_4623_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_4632 = const()[name = string("op_4632"), val = tensor([0, 2, 1])]; + string var_4648_pad_type_0 = const()[name = string("op_4648_pad_type_0"), val = string("valid")]; + int32 var_4648_groups_0 = const()[name = string("op_4648_groups_0"), val = int32(1)]; + tensor var_4648_strides_0 = const()[name = string("op_4648_strides_0"), val = tensor([1])]; + tensor var_4648_pad_0 = const()[name = string("op_4648_pad_0"), val = tensor([0, 0])]; + tensor var_4648_dilations_0 = const()[name = string("op_4648_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457925696))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4633_cast_fp16 = transpose(perm = var_4632, x = attn_output_45_cast_fp16)[name = string("transpose_146")]; + tensor var_4648_cast_fp16 = conv(dilations = var_4648_dilations_0, groups = var_4648_groups_0, pad = var_4648_pad_0, pad_type = var_4648_pad_type_0, strides = var_4648_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4633_cast_fp16)[name = string("op_4648_cast_fp16")]; + tensor var_4652 = const()[name = string("op_4652"), val = tensor([0, 2, 1])]; + int32 var_4663 = const()[name = string("op_4663"), val = int32(-1)]; + fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_77_cast_fp16 = transpose(perm = var_4652, x = var_4648_cast_fp16)[name = string("transpose_145")]; + tensor var_4665_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_4665_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_4663, interleave = input_91_interleave_0, values = (hidden_states_77_cast_fp16, var_4665_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_4660_to_fp16 = const()[name = string("op_4660_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4660_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457962624)))]; + tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_4679_to_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_4692 = const()[name = string("op_4692"), val = int32(-1)]; + fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4694_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_4694_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_4692, interleave = input_93_interleave_0, values = (hidden_states_79_cast_fp16, var_4694_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_4689_to_fp16 = const()[name = string("op_4689_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_4689_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor var_4708_to_fp16 = const()[name = string("op_4708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457964992)))]; + tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_4708_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_4720 = const()[name = string("op_4720"), val = tensor([0, 2, 1])]; + tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; + tensor var_4721_cast_fp16 = transpose(perm = var_4720, x = x_77_cast_fp16)[name = string("transpose_144")]; + tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_4721_cast_fp16)[name = string("input_95_cast_fp16")]; + string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; + tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; + tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; + int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463939392))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464160640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470132672))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; + string var_4746_mode_0 = const()[name = string("op_4746_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4746_cast_fp16 = gelu(mode = var_4746_mode_0, x = x_79_cast_fp16)[name = string("op_4746_cast_fp16")]; + tensor input_97_cast_fp16 = mul(x = var_4746_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470353920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476325952))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; + tensor var_4754_axes_0 = const()[name = string("op_4754_axes_0"), val = tensor([2])]; + tensor var_4754_cast_fp16 = squeeze(axes = var_4754_axes_0, x = e_9_cast_fp16)[name = string("op_4754_cast_fp16")]; + tensor var_4755 = const()[name = string("op_4755"), val = tensor([0, 2, 1])]; + int32 var_4766 = const()[name = string("op_4766"), val = int32(-1)]; + fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_81_cast_fp16 = transpose(perm = var_4755, x = var_4754_cast_fp16)[name = string("transpose_143")]; + tensor var_4768_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_4768_cast_fp16")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99_cast_fp16 = concat(axis = var_4766, interleave = input_99_interleave_0, values = (hidden_states_81_cast_fp16, var_4768_cast_fp16))[name = string("input_99_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_4763_to_fp16 = const()[name = string("op_4763_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4763_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; + tensor var_4782_to_fp16 = const()[name = string("op_4782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476362880)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_4782_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + int32 var_4794_axis_0 = const()[name = string("op_4794_axis_0"), val = int32(1)]; + int32 var_4794_batch_dims_0 = const()[name = string("op_4794_batch_dims_0"), val = int32(0)]; + bool var_4794_validate_indices_0 = const()[name = string("op_4794_validate_indices_0"), val = bool(false)]; + tensor var_4786_to_fp16 = const()[name = string("op_4786_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476365248)))]; + tensor var_4794_cast_fp16_cast_uint16 = gather(axis = var_4794_axis_0, batch_dims = var_4794_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4794_validate_indices_0, x = var_4786_to_fp16)[name = string("op_4794_cast_fp16_cast_uint16")]; + tensor var_4799 = const()[name = string("op_4799"), val = tensor([1, 1, 1, -1])]; + tensor sin_21_cast_fp16 = reshape(shape = var_4799, x = var_4794_cast_fp16_cast_uint16)[name = string("sin_21_cast_fp16")]; + int32 var_4809_axis_0 = const()[name = string("op_4809_axis_0"), val = int32(1)]; + int32 var_4809_batch_dims_0 = const()[name = string("op_4809_batch_dims_0"), val = int32(0)]; + bool var_4809_validate_indices_0 = const()[name = string("op_4809_validate_indices_0"), val = bool(false)]; + tensor var_4801_to_fp16 = const()[name = string("op_4801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480559616)))]; + tensor var_4809_cast_fp16_cast_uint16 = gather(axis = var_4809_axis_0, batch_dims = var_4809_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4809_validate_indices_0, x = var_4801_to_fp16)[name = string("op_4809_cast_fp16_cast_uint16")]; + tensor var_4814 = const()[name = string("op_4814"), val = tensor([1, 1, 1, -1])]; + tensor cos_21_cast_fp16 = reshape(shape = var_4814, x = var_4809_cast_fp16_cast_uint16)[name = string("cos_21_cast_fp16")]; + int32 var_4835 = const()[name = string("op_4835"), val = int32(-1)]; + fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4837_cast_fp16 = mul(x = hidden_states_85_cast_fp16, y = const_190_promoted_to_fp16)[name = string("op_4837_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_4835, interleave = input_101_interleave_0, values = (hidden_states_85_cast_fp16, var_4837_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_4832_to_fp16 = const()[name = string("op_4832_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_4832_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; + tensor var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484753984)))]; + tensor hidden_states_87_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_4851_to_fp16)[name = string("hidden_states_87_cast_fp16")]; + tensor var_4856 = const()[name = string("op_4856"), val = tensor([0, 2, 1])]; + tensor var_4859_axes_0 = const()[name = string("op_4859_axes_0"), val = tensor([2])]; + tensor var_4857_cast_fp16 = transpose(perm = var_4856, x = hidden_states_87_cast_fp16)[name = string("transpose_142")]; + tensor var_4859_cast_fp16 = expand_dims(axes = var_4859_axes_0, x = var_4857_cast_fp16)[name = string("op_4859_cast_fp16")]; + string var_4875_pad_type_0 = const()[name = string("op_4875_pad_type_0"), val = string("valid")]; + tensor var_4875_strides_0 = const()[name = string("op_4875_strides_0"), val = tensor([1, 1])]; + tensor var_4875_pad_0 = const()[name = string("op_4875_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4875_dilations_0 = const()[name = string("op_4875_dilations_0"), val = tensor([1, 1])]; + int32 var_4875_groups_0 = const()[name = string("op_4875_groups_0"), val = int32(1)]; + tensor var_4875 = conv(dilations = var_4875_dilations_0, groups = var_4875_groups_0, pad = var_4875_pad_0, pad_type = var_4875_pad_type_0, strides = var_4875_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4859_cast_fp16)[name = string("op_4875")]; + tensor var_4880 = const()[name = string("op_4880"), val = tensor([1, 4, 1, 256])]; + tensor var_4881 = reshape(shape = var_4880, x = var_4875)[name = string("op_4881")]; + string var_4897_pad_type_0 = const()[name = string("op_4897_pad_type_0"), val = string("valid")]; + tensor var_4897_strides_0 = const()[name = string("op_4897_strides_0"), val = tensor([1, 1])]; + tensor var_4897_pad_0 = const()[name = string("op_4897_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4897_dilations_0 = const()[name = string("op_4897_dilations_0"), val = tensor([1, 1])]; + int32 var_4897_groups_0 = const()[name = string("op_4897_groups_0"), val = int32(1)]; + tensor var_4897 = conv(dilations = var_4897_dilations_0, groups = var_4897_groups_0, pad = var_4897_pad_0, pad_type = var_4897_pad_type_0, strides = var_4897_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4859_cast_fp16)[name = string("op_4897")]; + tensor var_4902 = const()[name = string("op_4902"), val = tensor([1, 1, 1, 256])]; + tensor var_4903 = reshape(shape = var_4902, x = var_4897)[name = string("op_4903")]; + string var_4919_pad_type_0 = const()[name = string("op_4919_pad_type_0"), val = string("valid")]; + tensor var_4919_strides_0 = const()[name = string("op_4919_strides_0"), val = tensor([1, 1])]; + tensor var_4919_pad_0 = const()[name = string("op_4919_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4919_dilations_0 = const()[name = string("op_4919_dilations_0"), val = tensor([1, 1])]; + int32 var_4919_groups_0 = const()[name = string("op_4919_groups_0"), val = int32(1)]; + tensor var_4919 = conv(dilations = var_4919_dilations_0, groups = var_4919_groups_0, pad = var_4919_pad_0, pad_type = var_4919_pad_type_0, strides = var_4919_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4859_cast_fp16)[name = string("op_4919")]; + tensor var_4924 = const()[name = string("op_4924"), val = tensor([1, 1, 1, 256])]; + tensor var_4925 = reshape(shape = var_4924, x = var_4919)[name = string("op_4925")]; + int32 var_4940 = const()[name = string("op_4940"), val = int32(-1)]; + fp16 const_194_promoted = const()[name = string("const_194_promoted"), val = fp16(-0x1p+0)]; + tensor var_4942 = mul(x = var_4881, y = const_194_promoted)[name = string("op_4942")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105 = concat(axis = var_4940, interleave = input_105_interleave_0, values = (var_4881, var_4942))[name = string("input_105")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_4937_to_fp16 = const()[name = string("op_4937_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_4937_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; + tensor var_4956_to_fp16 = const()[name = string("op_4956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756352)))]; + tensor q_11_cast_fp16 = mul(x = normed_127, y = var_4956_to_fp16)[name = string("q_11_cast_fp16")]; + int32 var_4967 = const()[name = string("op_4967"), val = int32(-1)]; + fp16 const_198_promoted = const()[name = string("const_198_promoted"), val = fp16(-0x1p+0)]; + tensor var_4969 = mul(x = var_4903, y = const_198_promoted)[name = string("op_4969")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107 = concat(axis = var_4967, interleave = input_107_interleave_0, values = (var_4903, var_4969))[name = string("input_107")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_4964_to_fp16 = const()[name = string("op_4964_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_4964_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; + tensor var_4983_to_fp16 = const()[name = string("op_4983_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756928)))]; + tensor k_11_cast_fp16 = mul(x = normed_131, y = var_4983_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_4985_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4985_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5006_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5006_cast_fp16")]; + int32 var_5008 = const()[name = string("op_5008"), val = int32(-1)]; + bool var_5009_interleave_0 = const()[name = string("op_5009_interleave_0"), val = bool(false)]; + tensor var_5009_cast_fp16 = concat(axis = var_5008, interleave = var_5009_interleave_0, values = (var_5006_cast_fp16, x1_21_cast_fp16))[name = string("op_5009_cast_fp16")]; + tensor var_5010_cast_fp16 = mul(x = var_5009_cast_fp16, y = sin_21_cast_fp16)[name = string("op_5010_cast_fp16")]; + tensor query_states_21_cast_fp16 = add(x = var_4985_cast_fp16, y = var_5010_cast_fp16)[name = string("query_states_21_cast_fp16")]; + tensor var_5013_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_5013_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5034_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_5034_cast_fp16")]; + int32 var_5036 = const()[name = string("op_5036"), val = int32(-1)]; + bool var_5037_interleave_0 = const()[name = string("op_5037_interleave_0"), val = bool(false)]; + tensor var_5037_cast_fp16 = concat(axis = var_5036, interleave = var_5037_interleave_0, values = (var_5034_cast_fp16, x1_23_cast_fp16))[name = string("op_5037_cast_fp16")]; + tensor var_5038_cast_fp16 = mul(x = var_5037_cast_fp16, y = sin_21_cast_fp16)[name = string("op_5038_cast_fp16")]; + tensor key_states_21_cast_fp16 = add(x = var_5013_cast_fp16, y = var_5038_cast_fp16)[name = string("key_states_21_cast_fp16")]; + int32 var_5042 = const()[name = string("op_5042"), val = int32(1)]; + tensor var_5043 = add(x = current_pos, y = var_5042)[name = string("op_5043")]; + tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_53 = const()[name = string("expand_dims_53"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([1])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_50, expand_dims_51, current_pos, expand_dims_53))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_54, concat_23_values1_0, var_5043, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_21_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_62")]; + tensor expand_dims_56 = const()[name = string("expand_dims_56"), val = tensor([4])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_59 = const()[name = string("expand_dims_59"), val = tensor([0])]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_56, expand_dims_57, current_pos, expand_dims_59))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_60, concat_27_values1_0, var_5043, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = var_4925, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_63")]; + tensor var_5093_begin_0 = const()[name = string("op_5093_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5093_end_0 = const()[name = string("op_5093_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_5093_end_mask_0 = const()[name = string("op_5093_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5093_cast_fp16 = slice_by_index(begin = var_5093_begin_0, end = var_5093_end_0, end_mask = var_5093_end_mask_0, x = coreml_update_state_63)[name = string("op_5093_cast_fp16")]; + tensor var_5100_begin_0 = const()[name = string("op_5100_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_5100_end_0 = const()[name = string("op_5100_end_0"), val = tensor([5, 1, 4096, 256])]; + tensor var_5100_end_mask_0 = const()[name = string("op_5100_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5100_cast_fp16 = slice_by_index(begin = var_5100_begin_0, end = var_5100_end_0, end_mask = var_5100_end_mask_0, x = coreml_update_state_63)[name = string("op_5100_cast_fp16")]; + tensor var_5137 = const()[name = string("op_5137"), val = tensor([1, 4, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_5137, x = var_5093_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_5157 = const()[name = string("op_5157"), val = tensor([1, 4, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_5157, x = var_5100_cast_fp16)[name = string("x_91_cast_fp16")]; + bool var_5184_transpose_x_1 = const()[name = string("op_5184_transpose_x_1"), val = bool(false)]; + bool var_5184_transpose_y_1 = const()[name = string("op_5184_transpose_y_1"), val = bool(true)]; + tensor var_5184 = matmul(transpose_x = var_5184_transpose_x_1, transpose_y = var_5184_transpose_y_1, x = query_states_21_cast_fp16, y = x_85_cast_fp16)[name = string("op_5184")]; + fp16 var_5185_to_fp16 = const()[name = string("op_5185_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_31_cast_fp16 = mul(x = var_5184, y = var_5185_to_fp16)[name = string("attn_weights_31_cast_fp16")]; + tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; + int32 var_5220 = const()[name = string("op_5220"), val = int32(-1)]; + tensor attn_weights_35_cast_fp16 = softmax(axis = var_5220, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; + bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; + bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; + tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = x_91_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_5231_perm_0 = const()[name = string("op_5231_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5235 = const()[name = string("op_5235"), val = tensor([1, 1, 1024])]; + tensor var_5231_cast_fp16 = transpose(perm = var_5231_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_141")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_5235, x = var_5231_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_5240 = const()[name = string("op_5240"), val = tensor([0, 2, 1])]; + string var_5256_pad_type_0 = const()[name = string("op_5256_pad_type_0"), val = string("valid")]; + int32 var_5256_groups_0 = const()[name = string("op_5256_groups_0"), val = int32(1)]; + tensor var_5256_strides_0 = const()[name = string("op_5256_strides_0"), val = tensor([1])]; + tensor var_5256_pad_0 = const()[name = string("op_5256_pad_0"), val = tensor([0, 0])]; + tensor var_5256_dilations_0 = const()[name = string("op_5256_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485642304))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5241_cast_fp16 = transpose(perm = var_5240, x = attn_output_55_cast_fp16)[name = string("transpose_140")]; + tensor var_5256_cast_fp16 = conv(dilations = var_5256_dilations_0, groups = var_5256_groups_0, pad = var_5256_pad_0, pad_type = var_5256_pad_type_0, strides = var_5256_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5241_cast_fp16)[name = string("op_5256_cast_fp16")]; + tensor var_5260 = const()[name = string("op_5260"), val = tensor([0, 2, 1])]; + int32 var_5271 = const()[name = string("op_5271"), val = int32(-1)]; + fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_93_cast_fp16 = transpose(perm = var_5260, x = var_5256_cast_fp16)[name = string("transpose_139")]; + tensor var_5273_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5273_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_5271, interleave = input_111_interleave_0, values = (hidden_states_93_cast_fp16, var_5273_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5268_to_fp16 = const()[name = string("op_5268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5268_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; + tensor var_5287_to_fp16 = const()[name = string("op_5287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485679232)))]; + tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5287_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; + int32 var_5300 = const()[name = string("op_5300"), val = int32(-1)]; + fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5302_cast_fp16 = mul(x = hidden_states_95_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_5302_cast_fp16")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113_cast_fp16 = concat(axis = var_5300, interleave = input_113_interleave_0, values = (hidden_states_95_cast_fp16, var_5302_cast_fp16))[name = string("input_113_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5297_to_fp16 = const()[name = string("op_5297_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5297_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; + tensor var_5316_to_fp16 = const()[name = string("op_5316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485681600)))]; + tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5316_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_5328 = const()[name = string("op_5328"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_5329_cast_fp16 = transpose(perm = var_5328, x = x_93_cast_fp16)[name = string("transpose_138")]; + tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5329_cast_fp16)[name = string("input_115_cast_fp16")]; + string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; + tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; + tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; + int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485683968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491656000))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491877248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497849280))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; + string var_5354_mode_0 = const()[name = string("op_5354_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5354_cast_fp16 = gelu(mode = var_5354_mode_0, x = x_95_cast_fp16)[name = string("op_5354_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = var_5354_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498070528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504042560))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; + tensor var_5362_axes_0 = const()[name = string("op_5362_axes_0"), val = tensor([2])]; + tensor var_5362_cast_fp16 = squeeze(axes = var_5362_axes_0, x = e_11_cast_fp16)[name = string("op_5362_cast_fp16")]; + tensor var_5363 = const()[name = string("op_5363"), val = tensor([0, 2, 1])]; + int32 var_5374 = const()[name = string("op_5374"), val = int32(-1)]; + fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_97_cast_fp16 = transpose(perm = var_5363, x = var_5362_cast_fp16)[name = string("transpose_137")]; + tensor var_5376_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_5376_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_5374, interleave = input_119_interleave_0, values = (hidden_states_97_cast_fp16, var_5376_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_5371_to_fp16 = const()[name = string("op_5371_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5371_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor var_5390_to_fp16 = const()[name = string("op_5390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504079488)))]; + tensor hidden_states_99_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5390_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + int32 var_5441 = const()[name = string("op_5441"), val = int32(-1)]; + fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5443_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_228_promoted_to_fp16)[name = string("op_5443_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_5441, interleave = input_121_interleave_0, values = (hidden_states_101_cast_fp16, var_5443_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5438_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor var_5457_to_fp16 = const()[name = string("op_5457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504081856)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5457_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_5462 = const()[name = string("op_5462"), val = tensor([0, 2, 1])]; + tensor var_5465_axes_0 = const()[name = string("op_5465_axes_0"), val = tensor([2])]; + tensor var_5463_cast_fp16 = transpose(perm = var_5462, x = hidden_states_103_cast_fp16)[name = string("transpose_136")]; + tensor var_5465_cast_fp16 = expand_dims(axes = var_5465_axes_0, x = var_5463_cast_fp16)[name = string("op_5465_cast_fp16")]; + string var_5481_pad_type_0 = const()[name = string("op_5481_pad_type_0"), val = string("valid")]; + tensor var_5481_strides_0 = const()[name = string("op_5481_strides_0"), val = tensor([1, 1])]; + tensor var_5481_pad_0 = const()[name = string("op_5481_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5481_dilations_0 = const()[name = string("op_5481_dilations_0"), val = tensor([1, 1])]; + int32 var_5481_groups_0 = const()[name = string("op_5481_groups_0"), val = int32(1)]; + tensor var_5481 = conv(dilations = var_5481_dilations_0, groups = var_5481_groups_0, pad = var_5481_pad_0, pad_type = var_5481_pad_type_0, strides = var_5481_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5465_cast_fp16)[name = string("op_5481")]; + tensor var_5486 = const()[name = string("op_5486"), val = tensor([1, 4, 1, 256])]; + tensor var_5487 = reshape(shape = var_5486, x = var_5481)[name = string("op_5487")]; + string var_5503_pad_type_0 = const()[name = string("op_5503_pad_type_0"), val = string("valid")]; + tensor var_5503_strides_0 = const()[name = string("op_5503_strides_0"), val = tensor([1, 1])]; + tensor var_5503_pad_0 = const()[name = string("op_5503_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5503_dilations_0 = const()[name = string("op_5503_dilations_0"), val = tensor([1, 1])]; + int32 var_5503_groups_0 = const()[name = string("op_5503_groups_0"), val = int32(1)]; + tensor var_5503 = conv(dilations = var_5503_dilations_0, groups = var_5503_groups_0, pad = var_5503_pad_0, pad_type = var_5503_pad_type_0, strides = var_5503_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5465_cast_fp16)[name = string("op_5503")]; + tensor var_5508 = const()[name = string("op_5508"), val = tensor([1, 1, 1, 256])]; + tensor var_5509 = reshape(shape = var_5508, x = var_5503)[name = string("op_5509")]; + string var_5525_pad_type_0 = const()[name = string("op_5525_pad_type_0"), val = string("valid")]; + tensor var_5525_strides_0 = const()[name = string("op_5525_strides_0"), val = tensor([1, 1])]; + tensor var_5525_pad_0 = const()[name = string("op_5525_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5525_dilations_0 = const()[name = string("op_5525_dilations_0"), val = tensor([1, 1])]; + int32 var_5525_groups_0 = const()[name = string("op_5525_groups_0"), val = int32(1)]; + tensor var_5525 = conv(dilations = var_5525_dilations_0, groups = var_5525_groups_0, pad = var_5525_pad_0, pad_type = var_5525_pad_type_0, strides = var_5525_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5465_cast_fp16)[name = string("op_5525")]; + tensor var_5530 = const()[name = string("op_5530"), val = tensor([1, 1, 1, 256])]; + tensor var_5531 = reshape(shape = var_5530, x = var_5525)[name = string("op_5531")]; + int32 var_5546 = const()[name = string("op_5546"), val = int32(-1)]; + fp16 const_232_promoted = const()[name = string("const_232_promoted"), val = fp16(-0x1p+0)]; + tensor var_5548 = mul(x = var_5487, y = const_232_promoted)[name = string("op_5548")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_5546, interleave = input_125_interleave_0, values = (var_5487, var_5548))[name = string("input_125")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_5543_to_fp16 = const()[name = string("op_5543_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5543_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor var_5562_to_fp16 = const()[name = string("op_5562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084224)))]; + tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5562_to_fp16)[name = string("q_13_cast_fp16")]; + int32 var_5573 = const()[name = string("op_5573"), val = int32(-1)]; + fp16 const_236_promoted = const()[name = string("const_236_promoted"), val = fp16(-0x1p+0)]; + tensor var_5575 = mul(x = var_5509, y = const_236_promoted)[name = string("op_5575")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_5573, interleave = input_127_interleave_0, values = (var_5509, var_5575))[name = string("input_127")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_5570_to_fp16 = const()[name = string("op_5570_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_5570_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor var_5589_to_fp16 = const()[name = string("op_5589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084800)))]; + tensor k_13_cast_fp16 = mul(x = normed_155, y = var_5589_to_fp16)[name = string("k_13_cast_fp16")]; + tensor var_5591_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5591_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5612_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_242_promoted_to_fp16)[name = string("op_5612_cast_fp16")]; + int32 var_5614 = const()[name = string("op_5614"), val = int32(-1)]; + bool var_5615_interleave_0 = const()[name = string("op_5615_interleave_0"), val = bool(false)]; + tensor var_5615_cast_fp16 = concat(axis = var_5614, interleave = var_5615_interleave_0, values = (var_5612_cast_fp16, x1_25_cast_fp16))[name = string("op_5615_cast_fp16")]; + tensor var_5616_cast_fp16 = mul(x = var_5615_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5616_cast_fp16")]; + tensor query_states_25_cast_fp16 = add(x = var_5591_cast_fp16, y = var_5616_cast_fp16)[name = string("query_states_25_cast_fp16")]; + tensor var_5619_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5619_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5640_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5640_cast_fp16")]; + int32 var_5642 = const()[name = string("op_5642"), val = int32(-1)]; + bool var_5643_interleave_0 = const()[name = string("op_5643_interleave_0"), val = bool(false)]; + tensor var_5643_cast_fp16 = concat(axis = var_5642, interleave = var_5643_interleave_0, values = (var_5640_cast_fp16, x1_27_cast_fp16))[name = string("op_5643_cast_fp16")]; + tensor var_5644_cast_fp16 = mul(x = var_5643_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5644_cast_fp16")]; + tensor key_states_25_cast_fp16 = add(x = var_5619_cast_fp16, y = var_5644_cast_fp16)[name = string("key_states_25_cast_fp16")]; + tensor key_slice_11_begin_0 = const()[name = string("key_slice_11_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor key_slice_11_end_0 = const()[name = string("key_slice_11_end_0"), val = tensor([6, 1, 512, 256])]; + tensor key_slice_11_end_mask_0 = const()[name = string("key_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_11_cast_fp16 = slice_by_index(begin = key_slice_11_begin_0, end = key_slice_11_end_0, end_mask = key_slice_11_end_mask_0, x = coreml_update_state_61)[name = string("key_slice_11_cast_fp16")]; + tensor key_tail_11_begin_0 = const()[name = string("key_tail_11_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_11_end_0 = const()[name = string("key_tail_11_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_11_cast_fp16 = slice_by_index(begin = key_tail_11_begin_0, end = key_tail_11_end_0, x = key_slice_11_cast_fp16)[name = string("key_tail_11_cast_fp16")]; + int32 var_5657 = const()[name = string("op_5657"), val = int32(2)]; + bool shifted_key_11_interleave_0 = const()[name = string("shifted_key_11_interleave_0"), val = bool(false)]; + tensor shifted_key_11_cast_fp16 = concat(axis = var_5657, interleave = shifted_key_11_interleave_0, values = (key_tail_11_cast_fp16, key_states_25_cast_fp16))[name = string("shifted_key_11_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([5, 0, 0, 0])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([6, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_28, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_29, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = shifted_key_11_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_64")]; + tensor value_slice_11_begin_0 = const()[name = string("value_slice_11_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor value_slice_11_end_0 = const()[name = string("value_slice_11_end_0"), val = tensor([28, 1, 512, 256])]; + tensor value_slice_11_end_mask_0 = const()[name = string("value_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_11_cast_fp16 = slice_by_index(begin = value_slice_11_begin_0, end = value_slice_11_end_0, end_mask = value_slice_11_end_mask_0, x = coreml_update_state_64)[name = string("value_slice_11_cast_fp16")]; + tensor value_tail_11_begin_0 = const()[name = string("value_tail_11_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_11_end_0 = const()[name = string("value_tail_11_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_11_cast_fp16 = slice_by_index(begin = value_tail_11_begin_0, end = value_tail_11_end_0, x = value_slice_11_cast_fp16)[name = string("value_tail_11_cast_fp16")]; + int32 var_5691 = const()[name = string("op_5691"), val = int32(2)]; + bool shifted_value_11_interleave_0 = const()[name = string("shifted_value_11_interleave_0"), val = bool(false)]; + tensor shifted_value_11_cast_fp16 = concat(axis = var_5691, interleave = shifted_value_11_interleave_0, values = (value_tail_11_cast_fp16, var_5531))[name = string("shifted_value_11_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([27, 0, 0, 0])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([28, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = shifted_value_11_cast_fp16, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_65")]; + tensor var_5719_begin_0 = const()[name = string("op_5719_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_5719_end_0 = const()[name = string("op_5719_end_0"), val = tensor([6, 1, 512, 256])]; + tensor var_5719_end_mask_0 = const()[name = string("op_5719_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5719_cast_fp16 = slice_by_index(begin = var_5719_begin_0, end = var_5719_end_0, end_mask = var_5719_end_mask_0, x = coreml_update_state_65)[name = string("op_5719_cast_fp16")]; + tensor var_5726_begin_0 = const()[name = string("op_5726_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_5726_end_0 = const()[name = string("op_5726_end_0"), val = tensor([28, 1, 512, 256])]; + tensor var_5726_end_mask_0 = const()[name = string("op_5726_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5726_cast_fp16 = slice_by_index(begin = var_5726_begin_0, end = var_5726_end_0, end_mask = var_5726_end_mask_0, x = coreml_update_state_65)[name = string("op_5726_cast_fp16")]; + tensor var_5763 = const()[name = string("op_5763"), val = tensor([1, 4, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_5763, x = var_5719_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_5783 = const()[name = string("op_5783"), val = tensor([1, 4, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_5783, x = var_5726_cast_fp16)[name = string("x_107_cast_fp16")]; + bool var_5810_transpose_x_1 = const()[name = string("op_5810_transpose_x_1"), val = bool(false)]; + bool var_5810_transpose_y_1 = const()[name = string("op_5810_transpose_y_1"), val = bool(true)]; + tensor var_5810 = matmul(transpose_x = var_5810_transpose_x_1, transpose_y = var_5810_transpose_y_1, x = query_states_25_cast_fp16, y = x_101_cast_fp16)[name = string("op_5810")]; + fp16 var_5811_to_fp16 = const()[name = string("op_5811_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_5810, y = var_5811_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = var_2105)[name = string("attn_weights_39_cast_fp16")]; + int32 var_5846 = const()[name = string("op_5846"), val = int32(-1)]; + tensor attn_weights_41_cast_fp16 = softmax(axis = var_5846, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = x_107_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_5857_perm_0 = const()[name = string("op_5857_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5861 = const()[name = string("op_5861"), val = tensor([1, 1, 1024])]; + tensor var_5857_cast_fp16 = transpose(perm = var_5857_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_135")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_5861, x = var_5857_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_5866 = const()[name = string("op_5866"), val = tensor([0, 2, 1])]; + string var_5882_pad_type_0 = const()[name = string("op_5882_pad_type_0"), val = string("valid")]; + int32 var_5882_groups_0 = const()[name = string("op_5882_groups_0"), val = int32(1)]; + tensor var_5882_strides_0 = const()[name = string("op_5882_strides_0"), val = tensor([1])]; + tensor var_5882_pad_0 = const()[name = string("op_5882_pad_0"), val = tensor([0, 0])]; + tensor var_5882_dilations_0 = const()[name = string("op_5882_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504085376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504970176))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5867_cast_fp16 = transpose(perm = var_5866, x = attn_output_65_cast_fp16)[name = string("transpose_134")]; + tensor var_5882_cast_fp16 = conv(dilations = var_5882_dilations_0, groups = var_5882_groups_0, pad = var_5882_pad_0, pad_type = var_5882_pad_type_0, strides = var_5882_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5867_cast_fp16)[name = string("op_5882_cast_fp16")]; + tensor var_5886 = const()[name = string("op_5886"), val = tensor([0, 2, 1])]; + int32 var_5897 = const()[name = string("op_5897"), val = int32(-1)]; + fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_109_cast_fp16 = transpose(perm = var_5886, x = var_5882_cast_fp16)[name = string("transpose_133")]; + tensor var_5899_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5899_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_5897, interleave = input_131_interleave_0, values = (hidden_states_109_cast_fp16, var_5899_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_5894_to_fp16 = const()[name = string("op_5894_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_5894_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor var_5913_to_fp16 = const()[name = string("op_5913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505007104)))]; + tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_5913_to_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + int32 var_5926 = const()[name = string("op_5926"), val = int32(-1)]; + fp16 const_258_promoted_to_fp16 = const()[name = string("const_258_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5928_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_258_promoted_to_fp16)[name = string("op_5928_cast_fp16")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133_cast_fp16 = concat(axis = var_5926, interleave = input_133_interleave_0, values = (hidden_states_111_cast_fp16, var_5928_cast_fp16))[name = string("input_133_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_5923_to_fp16 = const()[name = string("op_5923_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_5923_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor var_5942_to_fp16 = const()[name = string("op_5942_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505009472)))]; + tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_5942_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_5954 = const()[name = string("op_5954"), val = tensor([0, 2, 1])]; + tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; + tensor var_5955_cast_fp16 = transpose(perm = var_5954, x = x_109_cast_fp16)[name = string("transpose_132")]; + tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_5955_cast_fp16)[name = string("input_135_cast_fp16")]; + string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; + tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; + tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; + int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505011840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510983872))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511205120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517177152))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; + string var_5980_mode_0 = const()[name = string("op_5980_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5980_cast_fp16 = gelu(mode = var_5980_mode_0, x = x_111_cast_fp16)[name = string("op_5980_cast_fp16")]; + tensor input_137_cast_fp16 = mul(x = var_5980_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523370432))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; + tensor var_5988_axes_0 = const()[name = string("op_5988_axes_0"), val = tensor([2])]; + tensor var_5988_cast_fp16 = squeeze(axes = var_5988_axes_0, x = e_13_cast_fp16)[name = string("op_5988_cast_fp16")]; + tensor var_5989 = const()[name = string("op_5989"), val = tensor([0, 2, 1])]; + int32 var_6000 = const()[name = string("op_6000"), val = int32(-1)]; + fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_113_cast_fp16 = transpose(perm = var_5989, x = var_5988_cast_fp16)[name = string("transpose_131")]; + tensor var_6002_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_6002_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_6000, interleave = input_139_interleave_0, values = (hidden_states_113_cast_fp16, var_6002_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_5997_to_fp16 = const()[name = string("op_5997_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_5997_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; + tensor var_6016_to_fp16 = const()[name = string("op_6016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523407360)))]; + tensor hidden_states_115_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_6016_to_fp16)[name = string("hidden_states_115_cast_fp16")]; + tensor hidden_states_117_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = hidden_states_115_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; + int32 var_6067 = const()[name = string("op_6067"), val = int32(-1)]; + fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6069_cast_fp16 = mul(x = hidden_states_117_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_6069_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_6067, interleave = input_141_interleave_0, values = (hidden_states_117_cast_fp16, var_6069_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6064_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; + tensor var_6083_to_fp16 = const()[name = string("op_6083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523409728)))]; + tensor hidden_states_119_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6083_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor var_6088 = const()[name = string("op_6088"), val = tensor([0, 2, 1])]; + tensor var_6091_axes_0 = const()[name = string("op_6091_axes_0"), val = tensor([2])]; + tensor var_6089_cast_fp16 = transpose(perm = var_6088, x = hidden_states_119_cast_fp16)[name = string("transpose_130")]; + tensor var_6091_cast_fp16 = expand_dims(axes = var_6091_axes_0, x = var_6089_cast_fp16)[name = string("op_6091_cast_fp16")]; + string var_6107_pad_type_0 = const()[name = string("op_6107_pad_type_0"), val = string("valid")]; + tensor var_6107_strides_0 = const()[name = string("op_6107_strides_0"), val = tensor([1, 1])]; + tensor var_6107_pad_0 = const()[name = string("op_6107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6107_dilations_0 = const()[name = string("op_6107_dilations_0"), val = tensor([1, 1])]; + int32 var_6107_groups_0 = const()[name = string("op_6107_groups_0"), val = int32(1)]; + tensor var_6107 = conv(dilations = var_6107_dilations_0, groups = var_6107_groups_0, pad = var_6107_pad_0, pad_type = var_6107_pad_type_0, strides = var_6107_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6091_cast_fp16)[name = string("op_6107")]; + tensor var_6112 = const()[name = string("op_6112"), val = tensor([1, 4, 1, 256])]; + tensor var_6113 = reshape(shape = var_6112, x = var_6107)[name = string("op_6113")]; + string var_6129_pad_type_0 = const()[name = string("op_6129_pad_type_0"), val = string("valid")]; + tensor var_6129_strides_0 = const()[name = string("op_6129_strides_0"), val = tensor([1, 1])]; + tensor var_6129_pad_0 = const()[name = string("op_6129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6129_dilations_0 = const()[name = string("op_6129_dilations_0"), val = tensor([1, 1])]; + int32 var_6129_groups_0 = const()[name = string("op_6129_groups_0"), val = int32(1)]; + tensor var_6129 = conv(dilations = var_6129_dilations_0, groups = var_6129_groups_0, pad = var_6129_pad_0, pad_type = var_6129_pad_type_0, strides = var_6129_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6091_cast_fp16)[name = string("op_6129")]; + tensor var_6134 = const()[name = string("op_6134"), val = tensor([1, 1, 1, 256])]; + tensor var_6135 = reshape(shape = var_6134, x = var_6129)[name = string("op_6135")]; + string var_6151_pad_type_0 = const()[name = string("op_6151_pad_type_0"), val = string("valid")]; + tensor var_6151_strides_0 = const()[name = string("op_6151_strides_0"), val = tensor([1, 1])]; + tensor var_6151_pad_0 = const()[name = string("op_6151_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6151_dilations_0 = const()[name = string("op_6151_dilations_0"), val = tensor([1, 1])]; + int32 var_6151_groups_0 = const()[name = string("op_6151_groups_0"), val = int32(1)]; + tensor var_6151 = conv(dilations = var_6151_dilations_0, groups = var_6151_groups_0, pad = var_6151_pad_0, pad_type = var_6151_pad_type_0, strides = var_6151_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6091_cast_fp16)[name = string("op_6151")]; + tensor var_6156 = const()[name = string("op_6156"), val = tensor([1, 1, 1, 256])]; + tensor var_6157 = reshape(shape = var_6156, x = var_6151)[name = string("op_6157")]; + int32 var_6172 = const()[name = string("op_6172"), val = int32(-1)]; + fp16 const_270_promoted = const()[name = string("const_270_promoted"), val = fp16(-0x1p+0)]; + tensor var_6174 = mul(x = var_6113, y = const_270_promoted)[name = string("op_6174")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145 = concat(axis = var_6172, interleave = input_145_interleave_0, values = (var_6113, var_6174))[name = string("input_145")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_6169_to_fp16 = const()[name = string("op_6169_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6169_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; + tensor var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412096)))]; + tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6188_to_fp16)[name = string("q_15_cast_fp16")]; + int32 var_6199 = const()[name = string("op_6199"), val = int32(-1)]; + fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; + tensor var_6201 = mul(x = var_6135, y = const_274_promoted)[name = string("op_6201")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_6199, interleave = input_147_interleave_0, values = (var_6135, var_6201))[name = string("input_147")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_6196_to_fp16 = const()[name = string("op_6196_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6196_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; + tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412672)))]; + tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6215_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_6217_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6217_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6238_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_6238_cast_fp16")]; + int32 var_6240 = const()[name = string("op_6240"), val = int32(-1)]; + bool var_6241_interleave_0 = const()[name = string("op_6241_interleave_0"), val = bool(false)]; + tensor var_6241_cast_fp16 = concat(axis = var_6240, interleave = var_6241_interleave_0, values = (var_6238_cast_fp16, x1_29_cast_fp16))[name = string("op_6241_cast_fp16")]; + tensor var_6242_cast_fp16 = mul(x = var_6241_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6242_cast_fp16")]; + tensor query_states_29_cast_fp16 = add(x = var_6217_cast_fp16, y = var_6242_cast_fp16)[name = string("query_states_29_cast_fp16")]; + tensor var_6245_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6245_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6266_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6266_cast_fp16")]; + int32 var_6268 = const()[name = string("op_6268"), val = int32(-1)]; + bool var_6269_interleave_0 = const()[name = string("op_6269_interleave_0"), val = bool(false)]; + tensor var_6269_cast_fp16 = concat(axis = var_6268, interleave = var_6269_interleave_0, values = (var_6266_cast_fp16, x1_31_cast_fp16))[name = string("op_6269_cast_fp16")]; + tensor var_6270_cast_fp16 = mul(x = var_6269_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6270_cast_fp16")]; + tensor key_states_29_cast_fp16 = add(x = var_6245_cast_fp16, y = var_6270_cast_fp16)[name = string("key_states_29_cast_fp16")]; + tensor key_slice_13_begin_0 = const()[name = string("key_slice_13_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor key_slice_13_end_0 = const()[name = string("key_slice_13_end_0"), val = tensor([7, 1, 512, 256])]; + tensor key_slice_13_end_mask_0 = const()[name = string("key_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_13_cast_fp16 = slice_by_index(begin = key_slice_13_begin_0, end = key_slice_13_end_0, end_mask = key_slice_13_end_mask_0, x = coreml_update_state_65)[name = string("key_slice_13_cast_fp16")]; + tensor key_tail_13_begin_0 = const()[name = string("key_tail_13_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_13_end_0 = const()[name = string("key_tail_13_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_13_cast_fp16 = slice_by_index(begin = key_tail_13_begin_0, end = key_tail_13_end_0, x = key_slice_13_cast_fp16)[name = string("key_tail_13_cast_fp16")]; + int32 var_6283 = const()[name = string("op_6283"), val = int32(2)]; + bool shifted_key_13_interleave_0 = const()[name = string("shifted_key_13_interleave_0"), val = bool(false)]; + tensor shifted_key_13_cast_fp16 = concat(axis = var_6283, interleave = shifted_key_13_interleave_0, values = (key_tail_13_cast_fp16, key_states_29_cast_fp16))[name = string("shifted_key_13_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([6, 0, 0, 0])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([7, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_32, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_33, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = shifted_key_13_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_66")]; + tensor value_slice_13_begin_0 = const()[name = string("value_slice_13_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor value_slice_13_end_0 = const()[name = string("value_slice_13_end_0"), val = tensor([29, 1, 512, 256])]; + tensor value_slice_13_end_mask_0 = const()[name = string("value_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_13_cast_fp16 = slice_by_index(begin = value_slice_13_begin_0, end = value_slice_13_end_0, end_mask = value_slice_13_end_mask_0, x = coreml_update_state_66)[name = string("value_slice_13_cast_fp16")]; + tensor value_tail_13_begin_0 = const()[name = string("value_tail_13_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_13_end_0 = const()[name = string("value_tail_13_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_13_cast_fp16 = slice_by_index(begin = value_tail_13_begin_0, end = value_tail_13_end_0, x = value_slice_13_cast_fp16)[name = string("value_tail_13_cast_fp16")]; + int32 var_6317 = const()[name = string("op_6317"), val = int32(2)]; + bool shifted_value_13_interleave_0 = const()[name = string("shifted_value_13_interleave_0"), val = bool(false)]; + tensor shifted_value_13_cast_fp16 = concat(axis = var_6317, interleave = shifted_value_13_interleave_0, values = (value_tail_13_cast_fp16, var_6157))[name = string("shifted_value_13_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([28, 0, 0, 0])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([29, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = shifted_value_13_cast_fp16, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_67")]; + tensor var_6345_begin_0 = const()[name = string("op_6345_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_6345_end_0 = const()[name = string("op_6345_end_0"), val = tensor([7, 1, 512, 256])]; + tensor var_6345_end_mask_0 = const()[name = string("op_6345_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6345_cast_fp16 = slice_by_index(begin = var_6345_begin_0, end = var_6345_end_0, end_mask = var_6345_end_mask_0, x = coreml_update_state_67)[name = string("op_6345_cast_fp16")]; + tensor var_6352_begin_0 = const()[name = string("op_6352_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_6352_end_0 = const()[name = string("op_6352_end_0"), val = tensor([29, 1, 512, 256])]; + tensor var_6352_end_mask_0 = const()[name = string("op_6352_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6352_cast_fp16 = slice_by_index(begin = var_6352_begin_0, end = var_6352_end_0, end_mask = var_6352_end_mask_0, x = coreml_update_state_67)[name = string("op_6352_cast_fp16")]; + tensor var_6389 = const()[name = string("op_6389"), val = tensor([1, 4, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_6389, x = var_6345_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_6409 = const()[name = string("op_6409"), val = tensor([1, 4, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_6409, x = var_6352_cast_fp16)[name = string("x_123_cast_fp16")]; + bool var_6436_transpose_x_1 = const()[name = string("op_6436_transpose_x_1"), val = bool(false)]; + bool var_6436_transpose_y_1 = const()[name = string("op_6436_transpose_y_1"), val = bool(true)]; + tensor var_6436 = matmul(transpose_x = var_6436_transpose_x_1, transpose_y = var_6436_transpose_y_1, x = query_states_29_cast_fp16, y = x_117_cast_fp16)[name = string("op_6436")]; + fp16 var_6437_to_fp16 = const()[name = string("op_6437_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_43_cast_fp16 = mul(x = var_6436, y = var_6437_to_fp16)[name = string("attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = var_2105)[name = string("attn_weights_45_cast_fp16")]; + int32 var_6472 = const()[name = string("op_6472"), val = int32(-1)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_6472, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; + bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; + tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = x_123_cast_fp16)[name = string("attn_output_71_cast_fp16")]; + tensor var_6483_perm_0 = const()[name = string("op_6483_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6487 = const()[name = string("op_6487"), val = tensor([1, 1, 1024])]; + tensor var_6483_cast_fp16 = transpose(perm = var_6483_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_129")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_6487, x = var_6483_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_6492 = const()[name = string("op_6492"), val = tensor([0, 2, 1])]; + string var_6508_pad_type_0 = const()[name = string("op_6508_pad_type_0"), val = string("valid")]; + int32 var_6508_groups_0 = const()[name = string("op_6508_groups_0"), val = int32(1)]; + tensor var_6508_strides_0 = const()[name = string("op_6508_strides_0"), val = tensor([1])]; + tensor var_6508_pad_0 = const()[name = string("op_6508_pad_0"), val = tensor([0, 0])]; + tensor var_6508_dilations_0 = const()[name = string("op_6508_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523413248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524298048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6493_cast_fp16 = transpose(perm = var_6492, x = attn_output_75_cast_fp16)[name = string("transpose_128")]; + tensor var_6508_cast_fp16 = conv(dilations = var_6508_dilations_0, groups = var_6508_groups_0, pad = var_6508_pad_0, pad_type = var_6508_pad_type_0, strides = var_6508_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_6493_cast_fp16)[name = string("op_6508_cast_fp16")]; + tensor var_6512 = const()[name = string("op_6512"), val = tensor([0, 2, 1])]; + int32 var_6523 = const()[name = string("op_6523"), val = int32(-1)]; + fp16 const_292_promoted_to_fp16 = const()[name = string("const_292_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_125_cast_fp16 = transpose(perm = var_6512, x = var_6508_cast_fp16)[name = string("transpose_127")]; + tensor var_6525_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_292_promoted_to_fp16)[name = string("op_6525_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_6523, interleave = input_151_interleave_0, values = (hidden_states_125_cast_fp16, var_6525_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_6520_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; + tensor var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524334976)))]; + tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_6539_to_fp16)[name = string("attn_output_79_cast_fp16")]; + tensor hidden_states_127_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + int32 var_6552 = const()[name = string("op_6552"), val = int32(-1)]; + fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6554_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_6552, interleave = input_153_interleave_0, values = (hidden_states_127_cast_fp16, var_6554_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_6549_to_fp16 = const()[name = string("op_6549_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_6549_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; + tensor var_6568_to_fp16 = const()[name = string("op_6568_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524337344)))]; + tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_6568_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_6580 = const()[name = string("op_6580"), val = tensor([0, 2, 1])]; + tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; + tensor var_6581_cast_fp16 = transpose(perm = var_6580, x = x_125_cast_fp16)[name = string("transpose_126")]; + tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_6581_cast_fp16)[name = string("input_155_cast_fp16")]; + string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; + tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; + tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; + int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524339712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530311744))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530532992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536505024))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; + string var_6606_mode_0 = const()[name = string("op_6606_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_6606_cast_fp16 = gelu(mode = var_6606_mode_0, x = x_127_cast_fp16)[name = string("op_6606_cast_fp16")]; + tensor input_157_cast_fp16 = mul(x = var_6606_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536726272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542698304))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; + tensor var_6614_axes_0 = const()[name = string("op_6614_axes_0"), val = tensor([2])]; + tensor var_6614_cast_fp16 = squeeze(axes = var_6614_axes_0, x = e_15_cast_fp16)[name = string("op_6614_cast_fp16")]; + tensor var_6615 = const()[name = string("op_6615"), val = tensor([0, 2, 1])]; + int32 var_6626 = const()[name = string("op_6626"), val = int32(-1)]; + fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_129_cast_fp16 = transpose(perm = var_6615, x = var_6614_cast_fp16)[name = string("transpose_125")]; + tensor var_6628_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_300_promoted_to_fp16)[name = string("op_6628_cast_fp16")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159_cast_fp16 = concat(axis = var_6626, interleave = input_159_interleave_0, values = (hidden_states_129_cast_fp16, var_6628_cast_fp16))[name = string("input_159_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_6623_to_fp16 = const()[name = string("op_6623_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_6623_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor var_6642_to_fp16 = const()[name = string("op_6642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542735232)))]; + tensor hidden_states_131_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_6642_to_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + int32 var_6693 = const()[name = string("op_6693"), val = int32(-1)]; + fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6695_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_6695_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_6693, interleave = input_161_interleave_0, values = (hidden_states_133_cast_fp16, var_6695_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_6690_to_fp16 = const()[name = string("op_6690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_6690_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor var_6709_to_fp16 = const()[name = string("op_6709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542737600)))]; + tensor hidden_states_135_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_6709_to_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor var_6714 = const()[name = string("op_6714"), val = tensor([0, 2, 1])]; + tensor var_6717_axes_0 = const()[name = string("op_6717_axes_0"), val = tensor([2])]; + tensor var_6715_cast_fp16 = transpose(perm = var_6714, x = hidden_states_135_cast_fp16)[name = string("transpose_124")]; + tensor var_6717_cast_fp16 = expand_dims(axes = var_6717_axes_0, x = var_6715_cast_fp16)[name = string("op_6717_cast_fp16")]; + string var_6733_pad_type_0 = const()[name = string("op_6733_pad_type_0"), val = string("valid")]; + tensor var_6733_strides_0 = const()[name = string("op_6733_strides_0"), val = tensor([1, 1])]; + tensor var_6733_pad_0 = const()[name = string("op_6733_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6733_dilations_0 = const()[name = string("op_6733_dilations_0"), val = tensor([1, 1])]; + int32 var_6733_groups_0 = const()[name = string("op_6733_groups_0"), val = int32(1)]; + tensor var_6733 = conv(dilations = var_6733_dilations_0, groups = var_6733_groups_0, pad = var_6733_pad_0, pad_type = var_6733_pad_type_0, strides = var_6733_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_6717_cast_fp16)[name = string("op_6733")]; + tensor var_6738 = const()[name = string("op_6738"), val = tensor([1, 4, 1, 256])]; + tensor var_6739 = reshape(shape = var_6738, x = var_6733)[name = string("op_6739")]; + string var_6755_pad_type_0 = const()[name = string("op_6755_pad_type_0"), val = string("valid")]; + tensor var_6755_strides_0 = const()[name = string("op_6755_strides_0"), val = tensor([1, 1])]; + tensor var_6755_pad_0 = const()[name = string("op_6755_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6755_dilations_0 = const()[name = string("op_6755_dilations_0"), val = tensor([1, 1])]; + int32 var_6755_groups_0 = const()[name = string("op_6755_groups_0"), val = int32(1)]; + tensor var_6755 = conv(dilations = var_6755_dilations_0, groups = var_6755_groups_0, pad = var_6755_pad_0, pad_type = var_6755_pad_type_0, strides = var_6755_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_6717_cast_fp16)[name = string("op_6755")]; + tensor var_6760 = const()[name = string("op_6760"), val = tensor([1, 1, 1, 256])]; + tensor var_6761 = reshape(shape = var_6760, x = var_6755)[name = string("op_6761")]; + string var_6777_pad_type_0 = const()[name = string("op_6777_pad_type_0"), val = string("valid")]; + tensor var_6777_strides_0 = const()[name = string("op_6777_strides_0"), val = tensor([1, 1])]; + tensor var_6777_pad_0 = const()[name = string("op_6777_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6777_dilations_0 = const()[name = string("op_6777_dilations_0"), val = tensor([1, 1])]; + int32 var_6777_groups_0 = const()[name = string("op_6777_groups_0"), val = int32(1)]; + tensor var_6777 = conv(dilations = var_6777_dilations_0, groups = var_6777_groups_0, pad = var_6777_pad_0, pad_type = var_6777_pad_type_0, strides = var_6777_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_6717_cast_fp16)[name = string("op_6777")]; + tensor var_6782 = const()[name = string("op_6782"), val = tensor([1, 1, 1, 256])]; + tensor var_6783 = reshape(shape = var_6782, x = var_6777)[name = string("op_6783")]; + int32 var_6798 = const()[name = string("op_6798"), val = int32(-1)]; + fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; + tensor var_6800 = mul(x = var_6739, y = const_308_promoted)[name = string("op_6800")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165 = concat(axis = var_6798, interleave = input_165_interleave_0, values = (var_6739, var_6800))[name = string("input_165")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_6795_to_fp16 = const()[name = string("op_6795_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_6795_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor var_6814_to_fp16 = const()[name = string("op_6814_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542739968)))]; + tensor q_17_cast_fp16 = mul(x = normed_199, y = var_6814_to_fp16)[name = string("q_17_cast_fp16")]; + int32 var_6825 = const()[name = string("op_6825"), val = int32(-1)]; + fp16 const_312_promoted = const()[name = string("const_312_promoted"), val = fp16(-0x1p+0)]; + tensor var_6827 = mul(x = var_6761, y = const_312_promoted)[name = string("op_6827")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_6825, interleave = input_167_interleave_0, values = (var_6761, var_6827))[name = string("input_167")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_6822_to_fp16 = const()[name = string("op_6822_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_6822_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor var_6841_to_fp16 = const()[name = string("op_6841_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542740544)))]; + tensor k_17_cast_fp16 = mul(x = normed_203, y = var_6841_to_fp16)[name = string("k_17_cast_fp16")]; + tensor var_6843_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6843_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6864_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_318_promoted_to_fp16)[name = string("op_6864_cast_fp16")]; + int32 var_6866 = const()[name = string("op_6866"), val = int32(-1)]; + bool var_6867_interleave_0 = const()[name = string("op_6867_interleave_0"), val = bool(false)]; + tensor var_6867_cast_fp16 = concat(axis = var_6866, interleave = var_6867_interleave_0, values = (var_6864_cast_fp16, x1_33_cast_fp16))[name = string("op_6867_cast_fp16")]; + tensor var_6868_cast_fp16 = mul(x = var_6867_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6868_cast_fp16")]; + tensor query_states_33_cast_fp16 = add(x = var_6843_cast_fp16, y = var_6868_cast_fp16)[name = string("query_states_33_cast_fp16")]; + tensor var_6871_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6871_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_321_promoted_to_fp16 = const()[name = string("const_321_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6892_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_321_promoted_to_fp16)[name = string("op_6892_cast_fp16")]; + int32 var_6894 = const()[name = string("op_6894"), val = int32(-1)]; + bool var_6895_interleave_0 = const()[name = string("op_6895_interleave_0"), val = bool(false)]; + tensor var_6895_cast_fp16 = concat(axis = var_6894, interleave = var_6895_interleave_0, values = (var_6892_cast_fp16, x1_35_cast_fp16))[name = string("op_6895_cast_fp16")]; + tensor var_6896_cast_fp16 = mul(x = var_6895_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6896_cast_fp16")]; + tensor key_states_33_cast_fp16 = add(x = var_6871_cast_fp16, y = var_6896_cast_fp16)[name = string("key_states_33_cast_fp16")]; + tensor key_slice_15_begin_0 = const()[name = string("key_slice_15_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor key_slice_15_end_0 = const()[name = string("key_slice_15_end_0"), val = tensor([8, 1, 512, 256])]; + tensor key_slice_15_end_mask_0 = const()[name = string("key_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_15_cast_fp16 = slice_by_index(begin = key_slice_15_begin_0, end = key_slice_15_end_0, end_mask = key_slice_15_end_mask_0, x = coreml_update_state_67)[name = string("key_slice_15_cast_fp16")]; + tensor key_tail_15_begin_0 = const()[name = string("key_tail_15_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_15_end_0 = const()[name = string("key_tail_15_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_15_cast_fp16 = slice_by_index(begin = key_tail_15_begin_0, end = key_tail_15_end_0, x = key_slice_15_cast_fp16)[name = string("key_tail_15_cast_fp16")]; + int32 var_6909 = const()[name = string("op_6909"), val = int32(2)]; + bool shifted_key_15_interleave_0 = const()[name = string("shifted_key_15_interleave_0"), val = bool(false)]; + tensor shifted_key_15_cast_fp16 = concat(axis = var_6909, interleave = shifted_key_15_interleave_0, values = (key_tail_15_cast_fp16, key_states_33_cast_fp16))[name = string("shifted_key_15_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([7, 0, 0, 0])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([8, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_36, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_37, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = shifted_key_15_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_68")]; + tensor value_slice_15_begin_0 = const()[name = string("value_slice_15_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor value_slice_15_end_0 = const()[name = string("value_slice_15_end_0"), val = tensor([30, 1, 512, 256])]; + tensor value_slice_15_end_mask_0 = const()[name = string("value_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_15_cast_fp16 = slice_by_index(begin = value_slice_15_begin_0, end = value_slice_15_end_0, end_mask = value_slice_15_end_mask_0, x = coreml_update_state_68)[name = string("value_slice_15_cast_fp16")]; + tensor value_tail_15_begin_0 = const()[name = string("value_tail_15_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_15_end_0 = const()[name = string("value_tail_15_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_15_cast_fp16 = slice_by_index(begin = value_tail_15_begin_0, end = value_tail_15_end_0, x = value_slice_15_cast_fp16)[name = string("value_tail_15_cast_fp16")]; + int32 var_6943 = const()[name = string("op_6943"), val = int32(2)]; + bool shifted_value_15_interleave_0 = const()[name = string("shifted_value_15_interleave_0"), val = bool(false)]; + tensor shifted_value_15_cast_fp16 = concat(axis = var_6943, interleave = shifted_value_15_interleave_0, values = (value_tail_15_cast_fp16, var_6783))[name = string("shifted_value_15_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([29, 0, 0, 0])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([30, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = shifted_value_15_cast_fp16, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_69")]; + tensor var_6971_begin_0 = const()[name = string("op_6971_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_6971_end_0 = const()[name = string("op_6971_end_0"), val = tensor([8, 1, 512, 256])]; + tensor var_6971_end_mask_0 = const()[name = string("op_6971_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6971_cast_fp16 = slice_by_index(begin = var_6971_begin_0, end = var_6971_end_0, end_mask = var_6971_end_mask_0, x = coreml_update_state_69)[name = string("op_6971_cast_fp16")]; + tensor var_6978_begin_0 = const()[name = string("op_6978_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_6978_end_0 = const()[name = string("op_6978_end_0"), val = tensor([30, 1, 512, 256])]; + tensor var_6978_end_mask_0 = const()[name = string("op_6978_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6978_cast_fp16 = slice_by_index(begin = var_6978_begin_0, end = var_6978_end_0, end_mask = var_6978_end_mask_0, x = coreml_update_state_69)[name = string("op_6978_cast_fp16")]; + tensor var_7015 = const()[name = string("op_7015"), val = tensor([1, 4, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_7015, x = var_6971_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_7035 = const()[name = string("op_7035"), val = tensor([1, 4, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_7035, x = var_6978_cast_fp16)[name = string("x_139_cast_fp16")]; + bool var_7062_transpose_x_1 = const()[name = string("op_7062_transpose_x_1"), val = bool(false)]; + bool var_7062_transpose_y_1 = const()[name = string("op_7062_transpose_y_1"), val = bool(true)]; + tensor var_7062 = matmul(transpose_x = var_7062_transpose_x_1, transpose_y = var_7062_transpose_y_1, x = query_states_33_cast_fp16, y = x_133_cast_fp16)[name = string("op_7062")]; + fp16 var_7063_to_fp16 = const()[name = string("op_7063_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_7062, y = var_7063_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = var_2105)[name = string("attn_weights_51_cast_fp16")]; + int32 var_7098 = const()[name = string("op_7098"), val = int32(-1)]; + tensor attn_weights_53_cast_fp16 = softmax(axis = var_7098, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; + bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; + tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = x_139_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_7109_perm_0 = const()[name = string("op_7109_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7113 = const()[name = string("op_7113"), val = tensor([1, 1, 1024])]; + tensor var_7109_cast_fp16 = transpose(perm = var_7109_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_123")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_7113, x = var_7109_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_7118 = const()[name = string("op_7118"), val = tensor([0, 2, 1])]; + string var_7134_pad_type_0 = const()[name = string("op_7134_pad_type_0"), val = string("valid")]; + int32 var_7134_groups_0 = const()[name = string("op_7134_groups_0"), val = int32(1)]; + tensor var_7134_strides_0 = const()[name = string("op_7134_strides_0"), val = tensor([1])]; + tensor var_7134_pad_0 = const()[name = string("op_7134_pad_0"), val = tensor([0, 0])]; + tensor var_7134_dilations_0 = const()[name = string("op_7134_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542741120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543625920))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7119_cast_fp16 = transpose(perm = var_7118, x = attn_output_85_cast_fp16)[name = string("transpose_122")]; + tensor var_7134_cast_fp16 = conv(dilations = var_7134_dilations_0, groups = var_7134_groups_0, pad = var_7134_pad_0, pad_type = var_7134_pad_type_0, strides = var_7134_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_7119_cast_fp16)[name = string("op_7134_cast_fp16")]; + tensor var_7138 = const()[name = string("op_7138"), val = tensor([0, 2, 1])]; + int32 var_7149 = const()[name = string("op_7149"), val = int32(-1)]; + fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_141_cast_fp16 = transpose(perm = var_7138, x = var_7134_cast_fp16)[name = string("transpose_121")]; + tensor var_7151_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_330_promoted_to_fp16)[name = string("op_7151_cast_fp16")]; + bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; + tensor input_171_cast_fp16 = concat(axis = var_7149, interleave = input_171_interleave_0, values = (hidden_states_141_cast_fp16, var_7151_cast_fp16))[name = string("input_171_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_7146_to_fp16 = const()[name = string("op_7146_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7146_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor var_7165_to_fp16 = const()[name = string("op_7165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543662848)))]; + tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7165_to_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + int32 var_7178 = const()[name = string("op_7178"), val = int32(-1)]; + fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7180_cast_fp16 = mul(x = hidden_states_143_cast_fp16, y = const_334_promoted_to_fp16)[name = string("op_7180_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_7178, interleave = input_173_interleave_0, values = (hidden_states_143_cast_fp16, var_7180_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_7175_to_fp16 = const()[name = string("op_7175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7175_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor var_7194_to_fp16 = const()[name = string("op_7194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543665216)))]; + tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7194_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_7206 = const()[name = string("op_7206"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_7207_cast_fp16 = transpose(perm = var_7206, x = x_141_cast_fp16)[name = string("transpose_120")]; + tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7207_cast_fp16)[name = string("input_175_cast_fp16")]; + string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; + tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; + tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; + int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543667584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549639616))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549860864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555832896))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; + string var_7232_mode_0 = const()[name = string("op_7232_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7232_cast_fp16 = gelu(mode = var_7232_mode_0, x = x_143_cast_fp16)[name = string("op_7232_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = var_7232_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556054144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562026176))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; + tensor var_7240_axes_0 = const()[name = string("op_7240_axes_0"), val = tensor([2])]; + tensor var_7240_cast_fp16 = squeeze(axes = var_7240_axes_0, x = e_17_cast_fp16)[name = string("op_7240_cast_fp16")]; + tensor var_7241 = const()[name = string("op_7241"), val = tensor([0, 2, 1])]; + int32 var_7252 = const()[name = string("op_7252"), val = int32(-1)]; + fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_145_cast_fp16 = transpose(perm = var_7241, x = var_7240_cast_fp16)[name = string("transpose_119")]; + tensor var_7254_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7254_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_7252, interleave = input_179_interleave_0, values = (hidden_states_145_cast_fp16, var_7254_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_7249_to_fp16 = const()[name = string("op_7249_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7249_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; + tensor var_7268_to_fp16 = const()[name = string("op_7268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063104)))]; + tensor hidden_states_147_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7268_to_fp16)[name = string("hidden_states_147_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = hidden_states_147_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_7319 = const()[name = string("op_7319"), val = int32(-1)]; + fp16 const_342_promoted_to_fp16 = const()[name = string("const_342_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7321_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_342_promoted_to_fp16)[name = string("op_7321_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_7319, interleave = input_181_interleave_0, values = (hidden_states_149_cast_fp16, var_7321_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_7316_to_fp16 = const()[name = string("op_7316_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7316_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; + tensor var_7335_to_fp16 = const()[name = string("op_7335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065472)))]; + tensor hidden_states_151_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7335_to_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor var_7340 = const()[name = string("op_7340"), val = tensor([0, 2, 1])]; + tensor var_7343_axes_0 = const()[name = string("op_7343_axes_0"), val = tensor([2])]; + tensor var_7341_cast_fp16 = transpose(perm = var_7340, x = hidden_states_151_cast_fp16)[name = string("transpose_118")]; + tensor var_7343_cast_fp16 = expand_dims(axes = var_7343_axes_0, x = var_7341_cast_fp16)[name = string("op_7343_cast_fp16")]; + string var_7359_pad_type_0 = const()[name = string("op_7359_pad_type_0"), val = string("valid")]; + tensor var_7359_strides_0 = const()[name = string("op_7359_strides_0"), val = tensor([1, 1])]; + tensor var_7359_pad_0 = const()[name = string("op_7359_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7359_dilations_0 = const()[name = string("op_7359_dilations_0"), val = tensor([1, 1])]; + int32 var_7359_groups_0 = const()[name = string("op_7359_groups_0"), val = int32(1)]; + tensor var_7359 = conv(dilations = var_7359_dilations_0, groups = var_7359_groups_0, pad = var_7359_pad_0, pad_type = var_7359_pad_type_0, strides = var_7359_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7343_cast_fp16)[name = string("op_7359")]; + tensor var_7364 = const()[name = string("op_7364"), val = tensor([1, 4, 1, 256])]; + tensor var_7365 = reshape(shape = var_7364, x = var_7359)[name = string("op_7365")]; + string var_7381_pad_type_0 = const()[name = string("op_7381_pad_type_0"), val = string("valid")]; + tensor var_7381_strides_0 = const()[name = string("op_7381_strides_0"), val = tensor([1, 1])]; + tensor var_7381_pad_0 = const()[name = string("op_7381_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7381_dilations_0 = const()[name = string("op_7381_dilations_0"), val = tensor([1, 1])]; + int32 var_7381_groups_0 = const()[name = string("op_7381_groups_0"), val = int32(1)]; + tensor var_7381 = conv(dilations = var_7381_dilations_0, groups = var_7381_groups_0, pad = var_7381_pad_0, pad_type = var_7381_pad_type_0, strides = var_7381_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7343_cast_fp16)[name = string("op_7381")]; + tensor var_7386 = const()[name = string("op_7386"), val = tensor([1, 1, 1, 256])]; + tensor var_7387 = reshape(shape = var_7386, x = var_7381)[name = string("op_7387")]; + string var_7403_pad_type_0 = const()[name = string("op_7403_pad_type_0"), val = string("valid")]; + tensor var_7403_strides_0 = const()[name = string("op_7403_strides_0"), val = tensor([1, 1])]; + tensor var_7403_pad_0 = const()[name = string("op_7403_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7403_dilations_0 = const()[name = string("op_7403_dilations_0"), val = tensor([1, 1])]; + int32 var_7403_groups_0 = const()[name = string("op_7403_groups_0"), val = int32(1)]; + tensor var_7403 = conv(dilations = var_7403_dilations_0, groups = var_7403_groups_0, pad = var_7403_pad_0, pad_type = var_7403_pad_type_0, strides = var_7403_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7343_cast_fp16)[name = string("op_7403")]; + tensor var_7408 = const()[name = string("op_7408"), val = tensor([1, 1, 1, 256])]; + tensor var_7409 = reshape(shape = var_7408, x = var_7403)[name = string("op_7409")]; + int32 var_7424 = const()[name = string("op_7424"), val = int32(-1)]; + fp16 const_346_promoted = const()[name = string("const_346_promoted"), val = fp16(-0x1p+0)]; + tensor var_7426 = mul(x = var_7365, y = const_346_promoted)[name = string("op_7426")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_7424, interleave = input_185_interleave_0, values = (var_7365, var_7426))[name = string("input_185")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_7421_to_fp16 = const()[name = string("op_7421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_7421_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; + tensor var_7440_to_fp16 = const()[name = string("op_7440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562067840)))]; + tensor q_19_cast_fp16 = mul(x = normed_223, y = var_7440_to_fp16)[name = string("q_19_cast_fp16")]; + int32 var_7451 = const()[name = string("op_7451"), val = int32(-1)]; + fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; + tensor var_7453 = mul(x = var_7387, y = const_350_promoted)[name = string("op_7453")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_7451, interleave = input_187_interleave_0, values = (var_7387, var_7453))[name = string("input_187")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_7448_to_fp16 = const()[name = string("op_7448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_7448_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; + tensor var_7467_to_fp16 = const()[name = string("op_7467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068416)))]; + tensor k_19_cast_fp16 = mul(x = normed_227, y = var_7467_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_7469_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7469_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7490_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7490_cast_fp16")]; + int32 var_7492 = const()[name = string("op_7492"), val = int32(-1)]; + bool var_7493_interleave_0 = const()[name = string("op_7493_interleave_0"), val = bool(false)]; + tensor var_7493_cast_fp16 = concat(axis = var_7492, interleave = var_7493_interleave_0, values = (var_7490_cast_fp16, x1_37_cast_fp16))[name = string("op_7493_cast_fp16")]; + tensor var_7494_cast_fp16 = mul(x = var_7493_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7494_cast_fp16")]; + tensor query_states_37_cast_fp16 = add(x = var_7469_cast_fp16, y = var_7494_cast_fp16)[name = string("query_states_37_cast_fp16")]; + tensor var_7497_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7497_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7518_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_359_promoted_to_fp16)[name = string("op_7518_cast_fp16")]; + int32 var_7520 = const()[name = string("op_7520"), val = int32(-1)]; + bool var_7521_interleave_0 = const()[name = string("op_7521_interleave_0"), val = bool(false)]; + tensor var_7521_cast_fp16 = concat(axis = var_7520, interleave = var_7521_interleave_0, values = (var_7518_cast_fp16, x1_39_cast_fp16))[name = string("op_7521_cast_fp16")]; + tensor var_7522_cast_fp16 = mul(x = var_7521_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7522_cast_fp16")]; + tensor key_states_37_cast_fp16 = add(x = var_7497_cast_fp16, y = var_7522_cast_fp16)[name = string("key_states_37_cast_fp16")]; + tensor key_slice_17_begin_0 = const()[name = string("key_slice_17_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor key_slice_17_end_0 = const()[name = string("key_slice_17_end_0"), val = tensor([9, 1, 512, 256])]; + tensor key_slice_17_end_mask_0 = const()[name = string("key_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_17_cast_fp16 = slice_by_index(begin = key_slice_17_begin_0, end = key_slice_17_end_0, end_mask = key_slice_17_end_mask_0, x = coreml_update_state_69)[name = string("key_slice_17_cast_fp16")]; + tensor key_tail_17_begin_0 = const()[name = string("key_tail_17_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_17_end_0 = const()[name = string("key_tail_17_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_17_cast_fp16 = slice_by_index(begin = key_tail_17_begin_0, end = key_tail_17_end_0, x = key_slice_17_cast_fp16)[name = string("key_tail_17_cast_fp16")]; + int32 var_7535 = const()[name = string("op_7535"), val = int32(2)]; + bool shifted_key_17_interleave_0 = const()[name = string("shifted_key_17_interleave_0"), val = bool(false)]; + tensor shifted_key_17_cast_fp16 = concat(axis = var_7535, interleave = shifted_key_17_interleave_0, values = (key_tail_17_cast_fp16, key_states_37_cast_fp16))[name = string("shifted_key_17_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([8, 0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([9, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_40, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_41, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = shifted_key_17_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_70")]; + tensor value_slice_17_begin_0 = const()[name = string("value_slice_17_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor value_slice_17_end_0 = const()[name = string("value_slice_17_end_0"), val = tensor([31, 1, 512, 256])]; + tensor value_slice_17_end_mask_0 = const()[name = string("value_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_17_cast_fp16 = slice_by_index(begin = value_slice_17_begin_0, end = value_slice_17_end_0, end_mask = value_slice_17_end_mask_0, x = coreml_update_state_70)[name = string("value_slice_17_cast_fp16")]; + tensor value_tail_17_begin_0 = const()[name = string("value_tail_17_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_17_end_0 = const()[name = string("value_tail_17_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_17_cast_fp16 = slice_by_index(begin = value_tail_17_begin_0, end = value_tail_17_end_0, x = value_slice_17_cast_fp16)[name = string("value_tail_17_cast_fp16")]; + int32 var_7569 = const()[name = string("op_7569"), val = int32(2)]; + bool shifted_value_17_interleave_0 = const()[name = string("shifted_value_17_interleave_0"), val = bool(false)]; + tensor shifted_value_17_cast_fp16 = concat(axis = var_7569, interleave = shifted_value_17_interleave_0, values = (value_tail_17_cast_fp16, var_7409))[name = string("shifted_value_17_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([30, 0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([31, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = shifted_value_17_cast_fp16, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_71")]; + tensor var_7597_begin_0 = const()[name = string("op_7597_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_7597_end_0 = const()[name = string("op_7597_end_0"), val = tensor([9, 1, 512, 256])]; + tensor var_7597_end_mask_0 = const()[name = string("op_7597_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7597_cast_fp16 = slice_by_index(begin = var_7597_begin_0, end = var_7597_end_0, end_mask = var_7597_end_mask_0, x = coreml_update_state_71)[name = string("op_7597_cast_fp16")]; + tensor var_7604_begin_0 = const()[name = string("op_7604_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_7604_end_0 = const()[name = string("op_7604_end_0"), val = tensor([31, 1, 512, 256])]; + tensor var_7604_end_mask_0 = const()[name = string("op_7604_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7604_cast_fp16 = slice_by_index(begin = var_7604_begin_0, end = var_7604_end_0, end_mask = var_7604_end_mask_0, x = coreml_update_state_71)[name = string("op_7604_cast_fp16")]; + tensor var_7641 = const()[name = string("op_7641"), val = tensor([1, 4, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_7641, x = var_7597_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_7661 = const()[name = string("op_7661"), val = tensor([1, 4, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_7661, x = var_7604_cast_fp16)[name = string("x_155_cast_fp16")]; + bool var_7688_transpose_x_1 = const()[name = string("op_7688_transpose_x_1"), val = bool(false)]; + bool var_7688_transpose_y_1 = const()[name = string("op_7688_transpose_y_1"), val = bool(true)]; + tensor var_7688 = matmul(transpose_x = var_7688_transpose_x_1, transpose_y = var_7688_transpose_y_1, x = query_states_37_cast_fp16, y = x_149_cast_fp16)[name = string("op_7688")]; + fp16 var_7689_to_fp16 = const()[name = string("op_7689_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_55_cast_fp16 = mul(x = var_7688, y = var_7689_to_fp16)[name = string("attn_weights_55_cast_fp16")]; + tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = var_2105)[name = string("attn_weights_57_cast_fp16")]; + int32 var_7724 = const()[name = string("op_7724"), val = int32(-1)]; + tensor attn_weights_59_cast_fp16 = softmax(axis = var_7724, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; + bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; + bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; + tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = x_155_cast_fp16)[name = string("attn_output_91_cast_fp16")]; + tensor var_7735_perm_0 = const()[name = string("op_7735_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7739 = const()[name = string("op_7739"), val = tensor([1, 1, 1024])]; + tensor var_7735_cast_fp16 = transpose(perm = var_7735_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_117")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_7739, x = var_7735_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_7744 = const()[name = string("op_7744"), val = tensor([0, 2, 1])]; + string var_7760_pad_type_0 = const()[name = string("op_7760_pad_type_0"), val = string("valid")]; + int32 var_7760_groups_0 = const()[name = string("op_7760_groups_0"), val = int32(1)]; + tensor var_7760_strides_0 = const()[name = string("op_7760_strides_0"), val = tensor([1])]; + tensor var_7760_pad_0 = const()[name = string("op_7760_pad_0"), val = tensor([0, 0])]; + tensor var_7760_dilations_0 = const()[name = string("op_7760_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562953792))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7745_cast_fp16 = transpose(perm = var_7744, x = attn_output_95_cast_fp16)[name = string("transpose_116")]; + tensor var_7760_cast_fp16 = conv(dilations = var_7760_dilations_0, groups = var_7760_groups_0, pad = var_7760_pad_0, pad_type = var_7760_pad_type_0, strides = var_7760_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_7745_cast_fp16)[name = string("op_7760_cast_fp16")]; + tensor var_7764 = const()[name = string("op_7764"), val = tensor([0, 2, 1])]; + int32 var_7775 = const()[name = string("op_7775"), val = int32(-1)]; + fp16 const_368_promoted_to_fp16 = const()[name = string("const_368_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_157_cast_fp16 = transpose(perm = var_7764, x = var_7760_cast_fp16)[name = string("transpose_115")]; + tensor var_7777_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_368_promoted_to_fp16)[name = string("op_7777_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_7775, interleave = input_191_interleave_0, values = (hidden_states_157_cast_fp16, var_7777_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_7772_to_fp16 = const()[name = string("op_7772_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_7772_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; + tensor var_7791_to_fp16 = const()[name = string("op_7791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562990720)))]; + tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_7791_to_fp16)[name = string("attn_output_99_cast_fp16")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_7804 = const()[name = string("op_7804"), val = int32(-1)]; + fp16 const_372_promoted_to_fp16 = const()[name = string("const_372_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7806_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_372_promoted_to_fp16)[name = string("op_7806_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_7804, interleave = input_193_interleave_0, values = (hidden_states_159_cast_fp16, var_7806_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_7801_to_fp16 = const()[name = string("op_7801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_7801_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; + tensor var_7820_to_fp16 = const()[name = string("op_7820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562993088)))]; + tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_7820_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_7832 = const()[name = string("op_7832"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_7833_cast_fp16 = transpose(perm = var_7832, x = x_157_cast_fp16)[name = string("transpose_114")]; + tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_7833_cast_fp16)[name = string("input_195_cast_fp16")]; + string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; + tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; + tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; + int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562995456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568967488))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569188736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575160768))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; + string var_7858_mode_0 = const()[name = string("op_7858_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7858_cast_fp16 = gelu(mode = var_7858_mode_0, x = x_159_cast_fp16)[name = string("op_7858_cast_fp16")]; + tensor input_197_cast_fp16 = mul(x = var_7858_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575382016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581354048))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; + tensor var_7866_axes_0 = const()[name = string("op_7866_axes_0"), val = tensor([2])]; + tensor var_7866_cast_fp16 = squeeze(axes = var_7866_axes_0, x = e_19_cast_fp16)[name = string("op_7866_cast_fp16")]; + tensor var_7867 = const()[name = string("op_7867"), val = tensor([0, 2, 1])]; + int32 var_7878 = const()[name = string("op_7878"), val = int32(-1)]; + fp16 const_376_promoted_to_fp16 = const()[name = string("const_376_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_161_cast_fp16 = transpose(perm = var_7867, x = var_7866_cast_fp16)[name = string("transpose_113")]; + tensor var_7880_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_376_promoted_to_fp16)[name = string("op_7880_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_7878, interleave = input_199_interleave_0, values = (hidden_states_161_cast_fp16, var_7880_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_7875_to_fp16 = const()[name = string("op_7875_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_7875_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor var_7894_to_fp16 = const()[name = string("op_7894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581390976)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_7894_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor hidden_states_165_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = hidden_states_163_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + int32 var_7945 = const()[name = string("op_7945"), val = int32(-1)]; + fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7947_cast_fp16 = mul(x = hidden_states_165_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_7947_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_7945, interleave = input_201_interleave_0, values = (hidden_states_165_cast_fp16, var_7947_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_7942_to_fp16 = const()[name = string("op_7942_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_7942_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor var_7961_to_fp16 = const()[name = string("op_7961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581393344)))]; + tensor hidden_states_167_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_7961_to_fp16)[name = string("hidden_states_167_cast_fp16")]; + tensor var_7966 = const()[name = string("op_7966"), val = tensor([0, 2, 1])]; + tensor var_7969_axes_0 = const()[name = string("op_7969_axes_0"), val = tensor([2])]; + tensor var_7967_cast_fp16 = transpose(perm = var_7966, x = hidden_states_167_cast_fp16)[name = string("transpose_112")]; + tensor var_7969_cast_fp16 = expand_dims(axes = var_7969_axes_0, x = var_7967_cast_fp16)[name = string("op_7969_cast_fp16")]; + string var_7985_pad_type_0 = const()[name = string("op_7985_pad_type_0"), val = string("valid")]; + tensor var_7985_strides_0 = const()[name = string("op_7985_strides_0"), val = tensor([1, 1])]; + tensor var_7985_pad_0 = const()[name = string("op_7985_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7985_dilations_0 = const()[name = string("op_7985_dilations_0"), val = tensor([1, 1])]; + int32 var_7985_groups_0 = const()[name = string("op_7985_groups_0"), val = int32(1)]; + tensor var_7985 = conv(dilations = var_7985_dilations_0, groups = var_7985_groups_0, pad = var_7985_pad_0, pad_type = var_7985_pad_type_0, strides = var_7985_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("op_7985")]; + tensor var_7990 = const()[name = string("op_7990"), val = tensor([1, 4, 1, 256])]; + tensor var_7991 = reshape(shape = var_7990, x = var_7985)[name = string("op_7991")]; + string var_8007_pad_type_0 = const()[name = string("op_8007_pad_type_0"), val = string("valid")]; + tensor var_8007_strides_0 = const()[name = string("op_8007_strides_0"), val = tensor([1, 1])]; + tensor var_8007_pad_0 = const()[name = string("op_8007_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8007_dilations_0 = const()[name = string("op_8007_dilations_0"), val = tensor([1, 1])]; + int32 var_8007_groups_0 = const()[name = string("op_8007_groups_0"), val = int32(1)]; + tensor var_8007 = conv(dilations = var_8007_dilations_0, groups = var_8007_groups_0, pad = var_8007_pad_0, pad_type = var_8007_pad_type_0, strides = var_8007_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("op_8007")]; + tensor var_8012 = const()[name = string("op_8012"), val = tensor([1, 1, 1, 256])]; + tensor var_8013 = reshape(shape = var_8012, x = var_8007)[name = string("op_8013")]; + string var_8029_pad_type_0 = const()[name = string("op_8029_pad_type_0"), val = string("valid")]; + tensor var_8029_strides_0 = const()[name = string("op_8029_strides_0"), val = tensor([1, 1])]; + tensor var_8029_pad_0 = const()[name = string("op_8029_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8029_dilations_0 = const()[name = string("op_8029_dilations_0"), val = tensor([1, 1])]; + int32 var_8029_groups_0 = const()[name = string("op_8029_groups_0"), val = int32(1)]; + tensor var_8029 = conv(dilations = var_8029_dilations_0, groups = var_8029_groups_0, pad = var_8029_pad_0, pad_type = var_8029_pad_type_0, strides = var_8029_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("op_8029")]; + tensor var_8034 = const()[name = string("op_8034"), val = tensor([1, 1, 1, 256])]; + tensor var_8035 = reshape(shape = var_8034, x = var_8029)[name = string("op_8035")]; + int32 var_8050 = const()[name = string("op_8050"), val = int32(-1)]; + fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; + tensor var_8052 = mul(x = var_7991, y = const_384_promoted)[name = string("op_8052")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_8050, interleave = input_205_interleave_0, values = (var_7991, var_8052))[name = string("input_205")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_8047_to_fp16 = const()[name = string("op_8047_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_8047_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor var_8066_to_fp16 = const()[name = string("op_8066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581395712)))]; + tensor q_21_cast_fp16 = mul(x = normed_247, y = var_8066_to_fp16)[name = string("q_21_cast_fp16")]; + int32 var_8077 = const()[name = string("op_8077"), val = int32(-1)]; + fp16 const_388_promoted = const()[name = string("const_388_promoted"), val = fp16(-0x1p+0)]; + tensor var_8079 = mul(x = var_8013, y = const_388_promoted)[name = string("op_8079")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207 = concat(axis = var_8077, interleave = input_207_interleave_0, values = (var_8013, var_8079))[name = string("input_207")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_8074_to_fp16 = const()[name = string("op_8074_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_8074_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor var_8093_to_fp16 = const()[name = string("op_8093_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396288)))]; + tensor k_21_cast_fp16 = mul(x = normed_251, y = var_8093_to_fp16)[name = string("k_21_cast_fp16")]; + tensor var_8095_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8095_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_394_promoted_to_fp16 = const()[name = string("const_394_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8116_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_394_promoted_to_fp16)[name = string("op_8116_cast_fp16")]; + int32 var_8118 = const()[name = string("op_8118"), val = int32(-1)]; + bool var_8119_interleave_0 = const()[name = string("op_8119_interleave_0"), val = bool(false)]; + tensor var_8119_cast_fp16 = concat(axis = var_8118, interleave = var_8119_interleave_0, values = (var_8116_cast_fp16, x1_41_cast_fp16))[name = string("op_8119_cast_fp16")]; + tensor var_8120_cast_fp16 = mul(x = var_8119_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8120_cast_fp16")]; + tensor query_states_41_cast_fp16 = add(x = var_8095_cast_fp16, y = var_8120_cast_fp16)[name = string("query_states_41_cast_fp16")]; + tensor var_8123_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8123_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_397_promoted_to_fp16 = const()[name = string("const_397_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8144_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_397_promoted_to_fp16)[name = string("op_8144_cast_fp16")]; + int32 var_8146 = const()[name = string("op_8146"), val = int32(-1)]; + bool var_8147_interleave_0 = const()[name = string("op_8147_interleave_0"), val = bool(false)]; + tensor var_8147_cast_fp16 = concat(axis = var_8146, interleave = var_8147_interleave_0, values = (var_8144_cast_fp16, x1_43_cast_fp16))[name = string("op_8147_cast_fp16")]; + tensor var_8148_cast_fp16 = mul(x = var_8147_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8148_cast_fp16")]; + tensor key_states_41_cast_fp16 = add(x = var_8123_cast_fp16, y = var_8148_cast_fp16)[name = string("key_states_41_cast_fp16")]; + tensor key_slice_19_begin_0 = const()[name = string("key_slice_19_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor key_slice_19_end_0 = const()[name = string("key_slice_19_end_0"), val = tensor([10, 1, 512, 256])]; + tensor key_slice_19_end_mask_0 = const()[name = string("key_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_19_cast_fp16 = slice_by_index(begin = key_slice_19_begin_0, end = key_slice_19_end_0, end_mask = key_slice_19_end_mask_0, x = coreml_update_state_71)[name = string("key_slice_19_cast_fp16")]; + tensor key_tail_19_begin_0 = const()[name = string("key_tail_19_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_19_end_0 = const()[name = string("key_tail_19_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_19_cast_fp16 = slice_by_index(begin = key_tail_19_begin_0, end = key_tail_19_end_0, x = key_slice_19_cast_fp16)[name = string("key_tail_19_cast_fp16")]; + int32 var_8161 = const()[name = string("op_8161"), val = int32(2)]; + bool shifted_key_19_interleave_0 = const()[name = string("shifted_key_19_interleave_0"), val = bool(false)]; + tensor shifted_key_19_cast_fp16 = concat(axis = var_8161, interleave = shifted_key_19_interleave_0, values = (key_tail_19_cast_fp16, key_states_41_cast_fp16))[name = string("shifted_key_19_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([9, 0, 0, 0])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([10, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_44, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_45, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = shifted_key_19_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_72")]; + tensor value_slice_19_begin_0 = const()[name = string("value_slice_19_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor value_slice_19_end_0 = const()[name = string("value_slice_19_end_0"), val = tensor([32, 1, 512, 256])]; + tensor value_slice_19_end_mask_0 = const()[name = string("value_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_19_cast_fp16 = slice_by_index(begin = value_slice_19_begin_0, end = value_slice_19_end_0, end_mask = value_slice_19_end_mask_0, x = coreml_update_state_72)[name = string("value_slice_19_cast_fp16")]; + tensor value_tail_19_begin_0 = const()[name = string("value_tail_19_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_19_end_0 = const()[name = string("value_tail_19_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_19_cast_fp16 = slice_by_index(begin = value_tail_19_begin_0, end = value_tail_19_end_0, x = value_slice_19_cast_fp16)[name = string("value_tail_19_cast_fp16")]; + int32 var_8195 = const()[name = string("op_8195"), val = int32(2)]; + bool shifted_value_19_interleave_0 = const()[name = string("shifted_value_19_interleave_0"), val = bool(false)]; + tensor shifted_value_19_cast_fp16 = concat(axis = var_8195, interleave = shifted_value_19_interleave_0, values = (value_tail_19_cast_fp16, var_8035))[name = string("shifted_value_19_cast_fp16")]; + tensor concat_46 = const()[name = string("concat_46"), val = tensor([31, 0, 0, 0])]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([32, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = shifted_value_19_cast_fp16, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_73")]; + tensor var_8223_begin_0 = const()[name = string("op_8223_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_8223_end_0 = const()[name = string("op_8223_end_0"), val = tensor([10, 1, 512, 256])]; + tensor var_8223_end_mask_0 = const()[name = string("op_8223_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8223_cast_fp16 = slice_by_index(begin = var_8223_begin_0, end = var_8223_end_0, end_mask = var_8223_end_mask_0, x = coreml_update_state_73)[name = string("op_8223_cast_fp16")]; + tensor var_8230_begin_0 = const()[name = string("op_8230_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_8230_end_0 = const()[name = string("op_8230_end_0"), val = tensor([32, 1, 512, 256])]; + tensor var_8230_end_mask_0 = const()[name = string("op_8230_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8230_cast_fp16 = slice_by_index(begin = var_8230_begin_0, end = var_8230_end_0, end_mask = var_8230_end_mask_0, x = coreml_update_state_73)[name = string("op_8230_cast_fp16")]; + tensor var_8267 = const()[name = string("op_8267"), val = tensor([1, 4, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_8267, x = var_8223_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_8287 = const()[name = string("op_8287"), val = tensor([1, 4, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_8287, x = var_8230_cast_fp16)[name = string("x_171_cast_fp16")]; + bool var_8314_transpose_x_1 = const()[name = string("op_8314_transpose_x_1"), val = bool(false)]; + bool var_8314_transpose_y_1 = const()[name = string("op_8314_transpose_y_1"), val = bool(true)]; + tensor var_8314 = matmul(transpose_x = var_8314_transpose_x_1, transpose_y = var_8314_transpose_y_1, x = query_states_41_cast_fp16, y = x_165_cast_fp16)[name = string("op_8314")]; + fp16 var_8315_to_fp16 = const()[name = string("op_8315_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_8314, y = var_8315_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = var_2105)[name = string("attn_weights_63_cast_fp16")]; + int32 var_8350 = const()[name = string("op_8350"), val = int32(-1)]; + tensor attn_weights_65_cast_fp16 = softmax(axis = var_8350, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; + bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; + bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; + tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = x_171_cast_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor var_8361_perm_0 = const()[name = string("op_8361_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8365 = const()[name = string("op_8365"), val = tensor([1, 1, 1024])]; + tensor var_8361_cast_fp16 = transpose(perm = var_8361_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_111")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_8365, x = var_8361_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_8370 = const()[name = string("op_8370"), val = tensor([0, 2, 1])]; + string var_8386_pad_type_0 = const()[name = string("op_8386_pad_type_0"), val = string("valid")]; + int32 var_8386_groups_0 = const()[name = string("op_8386_groups_0"), val = int32(1)]; + tensor var_8386_strides_0 = const()[name = string("op_8386_strides_0"), val = tensor([1])]; + tensor var_8386_pad_0 = const()[name = string("op_8386_pad_0"), val = tensor([0, 0])]; + tensor var_8386_dilations_0 = const()[name = string("op_8386_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582281664))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8371_cast_fp16 = transpose(perm = var_8370, x = attn_output_105_cast_fp16)[name = string("transpose_110")]; + tensor var_8386_cast_fp16 = conv(dilations = var_8386_dilations_0, groups = var_8386_groups_0, pad = var_8386_pad_0, pad_type = var_8386_pad_type_0, strides = var_8386_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_8371_cast_fp16)[name = string("op_8386_cast_fp16")]; + tensor var_8390 = const()[name = string("op_8390"), val = tensor([0, 2, 1])]; + int32 var_8401 = const()[name = string("op_8401"), val = int32(-1)]; + fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_173_cast_fp16 = transpose(perm = var_8390, x = var_8386_cast_fp16)[name = string("transpose_109")]; + tensor var_8403_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8403_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_8401, interleave = input_211_interleave_0, values = (hidden_states_173_cast_fp16, var_8403_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_8398_to_fp16 = const()[name = string("op_8398_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_8398_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor var_8417_to_fp16 = const()[name = string("op_8417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582318592)))]; + tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_8417_to_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = add(x = hidden_states_165_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + int32 var_8430 = const()[name = string("op_8430"), val = int32(-1)]; + fp16 const_410_promoted_to_fp16 = const()[name = string("const_410_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8432_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = const_410_promoted_to_fp16)[name = string("op_8432_cast_fp16")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213_cast_fp16 = concat(axis = var_8430, interleave = input_213_interleave_0, values = (hidden_states_175_cast_fp16, var_8432_cast_fp16))[name = string("input_213_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_8427_to_fp16 = const()[name = string("op_8427_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_8427_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor var_8446_to_fp16 = const()[name = string("op_8446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582320960)))]; + tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_8446_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_8458 = const()[name = string("op_8458"), val = tensor([0, 2, 1])]; + tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; + tensor var_8459_cast_fp16 = transpose(perm = var_8458, x = x_173_cast_fp16)[name = string("transpose_108")]; + tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_8459_cast_fp16)[name = string("input_215_cast_fp16")]; + string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; + tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; + tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; + int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582323328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588295360))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588516608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594488640))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; + string var_8484_mode_0 = const()[name = string("op_8484_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_8484_cast_fp16 = gelu(mode = var_8484_mode_0, x = x_175_cast_fp16)[name = string("op_8484_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_8484_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594709888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600681920))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; + tensor var_8492_axes_0 = const()[name = string("op_8492_axes_0"), val = tensor([2])]; + tensor var_8492_cast_fp16 = squeeze(axes = var_8492_axes_0, x = e_21_cast_fp16)[name = string("op_8492_cast_fp16")]; + tensor var_8493 = const()[name = string("op_8493"), val = tensor([0, 2, 1])]; + int32 var_8504 = const()[name = string("op_8504"), val = int32(-1)]; + fp16 const_414_promoted_to_fp16 = const()[name = string("const_414_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_177_cast_fp16 = transpose(perm = var_8493, x = var_8492_cast_fp16)[name = string("transpose_107")]; + tensor var_8506_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_414_promoted_to_fp16)[name = string("op_8506_cast_fp16")]; + bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; + tensor input_219_cast_fp16 = concat(axis = var_8504, interleave = input_219_interleave_0, values = (hidden_states_177_cast_fp16, var_8506_cast_fp16))[name = string("input_219_cast_fp16")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_8501_to_fp16 = const()[name = string("op_8501_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_8501_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; + tensor var_8520_to_fp16 = const()[name = string("op_8520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600718848)))]; + tensor hidden_states_179_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_8520_to_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + int32 var_8571 = const()[name = string("op_8571"), val = int32(-1)]; + fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8573_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8573_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_8571, interleave = input_221_interleave_0, values = (hidden_states_181_cast_fp16, var_8573_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_8568_to_fp16 = const()[name = string("op_8568_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_8568_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; + tensor var_8587_to_fp16 = const()[name = string("op_8587_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600721216)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_8587_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_8592 = const()[name = string("op_8592"), val = tensor([0, 2, 1])]; + tensor var_8595_axes_0 = const()[name = string("op_8595_axes_0"), val = tensor([2])]; + tensor var_8593_cast_fp16 = transpose(perm = var_8592, x = hidden_states_183_cast_fp16)[name = string("transpose_106")]; + tensor var_8595_cast_fp16 = expand_dims(axes = var_8595_axes_0, x = var_8593_cast_fp16)[name = string("op_8595_cast_fp16")]; + string var_8611_pad_type_0 = const()[name = string("op_8611_pad_type_0"), val = string("valid")]; + tensor var_8611_strides_0 = const()[name = string("op_8611_strides_0"), val = tensor([1, 1])]; + tensor var_8611_pad_0 = const()[name = string("op_8611_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8611_dilations_0 = const()[name = string("op_8611_dilations_0"), val = tensor([1, 1])]; + int32 var_8611_groups_0 = const()[name = string("op_8611_groups_0"), val = int32(1)]; + tensor var_8611 = conv(dilations = var_8611_dilations_0, groups = var_8611_groups_0, pad = var_8611_pad_0, pad_type = var_8611_pad_type_0, strides = var_8611_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_8595_cast_fp16)[name = string("op_8611")]; + tensor var_8616 = const()[name = string("op_8616"), val = tensor([1, 4, 1, 256])]; + tensor var_8617 = reshape(shape = var_8616, x = var_8611)[name = string("op_8617")]; + string var_8633_pad_type_0 = const()[name = string("op_8633_pad_type_0"), val = string("valid")]; + tensor var_8633_strides_0 = const()[name = string("op_8633_strides_0"), val = tensor([1, 1])]; + tensor var_8633_pad_0 = const()[name = string("op_8633_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8633_dilations_0 = const()[name = string("op_8633_dilations_0"), val = tensor([1, 1])]; + int32 var_8633_groups_0 = const()[name = string("op_8633_groups_0"), val = int32(1)]; + tensor var_8633 = conv(dilations = var_8633_dilations_0, groups = var_8633_groups_0, pad = var_8633_pad_0, pad_type = var_8633_pad_type_0, strides = var_8633_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_8595_cast_fp16)[name = string("op_8633")]; + tensor var_8638 = const()[name = string("op_8638"), val = tensor([1, 1, 1, 256])]; + tensor var_8639 = reshape(shape = var_8638, x = var_8633)[name = string("op_8639")]; + string var_8655_pad_type_0 = const()[name = string("op_8655_pad_type_0"), val = string("valid")]; + tensor var_8655_strides_0 = const()[name = string("op_8655_strides_0"), val = tensor([1, 1])]; + tensor var_8655_pad_0 = const()[name = string("op_8655_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8655_dilations_0 = const()[name = string("op_8655_dilations_0"), val = tensor([1, 1])]; + int32 var_8655_groups_0 = const()[name = string("op_8655_groups_0"), val = int32(1)]; + tensor var_8655 = conv(dilations = var_8655_dilations_0, groups = var_8655_groups_0, pad = var_8655_pad_0, pad_type = var_8655_pad_type_0, strides = var_8655_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_8595_cast_fp16)[name = string("op_8655")]; + tensor var_8660 = const()[name = string("op_8660"), val = tensor([1, 1, 1, 256])]; + tensor var_8661 = reshape(shape = var_8660, x = var_8655)[name = string("op_8661")]; + int32 var_8676 = const()[name = string("op_8676"), val = int32(-1)]; + fp16 const_422_promoted = const()[name = string("const_422_promoted"), val = fp16(-0x1p+0)]; + tensor var_8678 = mul(x = var_8617, y = const_422_promoted)[name = string("op_8678")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225 = concat(axis = var_8676, interleave = input_225_interleave_0, values = (var_8617, var_8678))[name = string("input_225")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_8673_to_fp16 = const()[name = string("op_8673_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_8673_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; + tensor var_8692_to_fp16 = const()[name = string("op_8692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600723584)))]; + tensor q_23_cast_fp16 = mul(x = normed_271, y = var_8692_to_fp16)[name = string("q_23_cast_fp16")]; + int32 var_8703 = const()[name = string("op_8703"), val = int32(-1)]; + fp16 const_426_promoted = const()[name = string("const_426_promoted"), val = fp16(-0x1p+0)]; + tensor var_8705 = mul(x = var_8639, y = const_426_promoted)[name = string("op_8705")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227 = concat(axis = var_8703, interleave = input_227_interleave_0, values = (var_8639, var_8705))[name = string("input_227")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_8700_to_fp16 = const()[name = string("op_8700_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_8700_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; + tensor var_8719_to_fp16 = const()[name = string("op_8719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724160)))]; + tensor k_23_cast_fp16 = mul(x = normed_275, y = var_8719_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_8721_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8721_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_432_promoted_to_fp16 = const()[name = string("const_432_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8742_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_432_promoted_to_fp16)[name = string("op_8742_cast_fp16")]; + int32 var_8744 = const()[name = string("op_8744"), val = int32(-1)]; + bool var_8745_interleave_0 = const()[name = string("op_8745_interleave_0"), val = bool(false)]; + tensor var_8745_cast_fp16 = concat(axis = var_8744, interleave = var_8745_interleave_0, values = (var_8742_cast_fp16, x1_45_cast_fp16))[name = string("op_8745_cast_fp16")]; + tensor var_8746_cast_fp16 = mul(x = var_8745_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8746_cast_fp16")]; + tensor query_states_45_cast_fp16 = add(x = var_8721_cast_fp16, y = var_8746_cast_fp16)[name = string("query_states_45_cast_fp16")]; + tensor var_8749_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8749_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_435_promoted_to_fp16 = const()[name = string("const_435_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8770_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_435_promoted_to_fp16)[name = string("op_8770_cast_fp16")]; + int32 var_8772 = const()[name = string("op_8772"), val = int32(-1)]; + bool var_8773_interleave_0 = const()[name = string("op_8773_interleave_0"), val = bool(false)]; + tensor var_8773_cast_fp16 = concat(axis = var_8772, interleave = var_8773_interleave_0, values = (var_8770_cast_fp16, x1_47_cast_fp16))[name = string("op_8773_cast_fp16")]; + tensor var_8774_cast_fp16 = mul(x = var_8773_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8774_cast_fp16")]; + tensor key_states_45_cast_fp16 = add(x = var_8749_cast_fp16, y = var_8774_cast_fp16)[name = string("key_states_45_cast_fp16")]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([1])]; + tensor expand_dims_113 = const()[name = string("expand_dims_113"), val = tensor([0])]; + tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; + tensor expand_dims_116 = const()[name = string("expand_dims_116"), val = tensor([2])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_112, expand_dims_113, current_pos, expand_dims_115))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_116, concat_51_values1_0, var_5043, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_45_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_74")]; + tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([5])]; + tensor expand_dims_119 = const()[name = string("expand_dims_119"), val = tensor([0])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([6])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_118, expand_dims_119, current_pos, expand_dims_121))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_122, concat_55_values1_0, var_5043, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = var_8661, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_75")]; + tensor var_8829_begin_0 = const()[name = string("op_8829_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_8829_end_0 = const()[name = string("op_8829_end_0"), val = tensor([2, 1, 4096, 256])]; + tensor var_8829_end_mask_0 = const()[name = string("op_8829_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8829_cast_fp16 = slice_by_index(begin = var_8829_begin_0, end = var_8829_end_0, end_mask = var_8829_end_mask_0, x = coreml_update_state_75)[name = string("op_8829_cast_fp16")]; + tensor var_8836_begin_0 = const()[name = string("op_8836_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_8836_end_0 = const()[name = string("op_8836_end_0"), val = tensor([6, 1, 4096, 256])]; + tensor var_8836_end_mask_0 = const()[name = string("op_8836_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8836_cast_fp16 = slice_by_index(begin = var_8836_begin_0, end = var_8836_end_0, end_mask = var_8836_end_mask_0, x = coreml_update_state_75)[name = string("op_8836_cast_fp16")]; + tensor var_8873 = const()[name = string("op_8873"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_8873, x = var_8829_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_8893 = const()[name = string("op_8893"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_8893, x = var_8836_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_8920_transpose_x_1 = const()[name = string("op_8920_transpose_x_1"), val = bool(false)]; + bool var_8920_transpose_y_1 = const()[name = string("op_8920_transpose_y_1"), val = bool(true)]; + tensor var_8920 = matmul(transpose_x = var_8920_transpose_x_1, transpose_y = var_8920_transpose_y_1, x = query_states_45_cast_fp16, y = x_181_cast_fp16)[name = string("op_8920")]; + fp16 var_8921_to_fp16 = const()[name = string("op_8921_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_67_cast_fp16 = mul(x = var_8920, y = var_8921_to_fp16)[name = string("attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; + int32 var_8956 = const()[name = string("op_8956"), val = int32(-1)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_8956, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; + bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; + tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = x_187_cast_fp16)[name = string("attn_output_111_cast_fp16")]; + tensor var_8967_perm_0 = const()[name = string("op_8967_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8971 = const()[name = string("op_8971"), val = tensor([1, 1, 1024])]; + tensor var_8967_cast_fp16 = transpose(perm = var_8967_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_105")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_8971, x = var_8967_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_8976 = const()[name = string("op_8976"), val = tensor([0, 2, 1])]; + string var_8992_pad_type_0 = const()[name = string("op_8992_pad_type_0"), val = string("valid")]; + int32 var_8992_groups_0 = const()[name = string("op_8992_groups_0"), val = int32(1)]; + tensor var_8992_strides_0 = const()[name = string("op_8992_strides_0"), val = tensor([1])]; + tensor var_8992_pad_0 = const()[name = string("op_8992_pad_0"), val = tensor([0, 0])]; + tensor var_8992_dilations_0 = const()[name = string("op_8992_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601609536))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8977_cast_fp16 = transpose(perm = var_8976, x = attn_output_115_cast_fp16)[name = string("transpose_104")]; + tensor var_8992_cast_fp16 = conv(dilations = var_8992_dilations_0, groups = var_8992_groups_0, pad = var_8992_pad_0, pad_type = var_8992_pad_type_0, strides = var_8992_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_8977_cast_fp16)[name = string("op_8992_cast_fp16")]; + tensor var_8996 = const()[name = string("op_8996"), val = tensor([0, 2, 1])]; + int32 var_9007 = const()[name = string("op_9007"), val = int32(-1)]; + fp16 const_444_promoted_to_fp16 = const()[name = string("const_444_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_189_cast_fp16 = transpose(perm = var_8996, x = var_8992_cast_fp16)[name = string("transpose_103")]; + tensor var_9009_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_444_promoted_to_fp16)[name = string("op_9009_cast_fp16")]; + bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; + tensor input_231_cast_fp16 = concat(axis = var_9007, interleave = input_231_interleave_0, values = (hidden_states_189_cast_fp16, var_9009_cast_fp16))[name = string("input_231_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_9004_to_fp16 = const()[name = string("op_9004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_9004_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; + tensor var_9023_to_fp16 = const()[name = string("op_9023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601646464)))]; + tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_9023_to_fp16)[name = string("attn_output_119_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + int32 var_9036 = const()[name = string("op_9036"), val = int32(-1)]; + fp16 const_448_promoted_to_fp16 = const()[name = string("const_448_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9038_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_448_promoted_to_fp16)[name = string("op_9038_cast_fp16")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233_cast_fp16 = concat(axis = var_9036, interleave = input_233_interleave_0, values = (hidden_states_191_cast_fp16, var_9038_cast_fp16))[name = string("input_233_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_9033_to_fp16 = const()[name = string("op_9033_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_9033_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; + tensor var_9052_to_fp16 = const()[name = string("op_9052_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601648832)))]; + tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_9052_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_9064 = const()[name = string("op_9064"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_9065_cast_fp16 = transpose(perm = var_9064, x = x_189_cast_fp16)[name = string("transpose_102")]; + tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_9065_cast_fp16)[name = string("input_235_cast_fp16")]; + string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; + tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; + tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; + int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601651200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607623232))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607844480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613816512))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; + string var_9090_mode_0 = const()[name = string("op_9090_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9090_cast_fp16 = gelu(mode = var_9090_mode_0, x = x_191_cast_fp16)[name = string("op_9090_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = var_9090_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614037760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620009792))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; + tensor var_9098_axes_0 = const()[name = string("op_9098_axes_0"), val = tensor([2])]; + tensor var_9098_cast_fp16 = squeeze(axes = var_9098_axes_0, x = e_23_cast_fp16)[name = string("op_9098_cast_fp16")]; + tensor var_9099 = const()[name = string("op_9099"), val = tensor([0, 2, 1])]; + int32 var_9110 = const()[name = string("op_9110"), val = int32(-1)]; + fp16 const_452_promoted_to_fp16 = const()[name = string("const_452_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_193_cast_fp16 = transpose(perm = var_9099, x = var_9098_cast_fp16)[name = string("transpose_101")]; + tensor var_9112_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_452_promoted_to_fp16)[name = string("op_9112_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_9110, interleave = input_239_interleave_0, values = (hidden_states_193_cast_fp16, var_9112_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_9107_to_fp16 = const()[name = string("op_9107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_9107_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor var_9126_to_fp16 = const()[name = string("op_9126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620046720)))]; + tensor hidden_states_195_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_9126_to_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = hidden_states_195_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + int32 var_9177 = const()[name = string("op_9177"), val = int32(-1)]; + fp16 const_456_promoted_to_fp16 = const()[name = string("const_456_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9179_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = const_456_promoted_to_fp16)[name = string("op_9179_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_9177, interleave = input_241_interleave_0, values = (hidden_states_197_cast_fp16, var_9179_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_9174_to_fp16 = const()[name = string("op_9174_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_9174_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor var_9193_to_fp16 = const()[name = string("op_9193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620049088)))]; + tensor hidden_states_199_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_9193_to_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor var_9198 = const()[name = string("op_9198"), val = tensor([0, 2, 1])]; + tensor var_9201_axes_0 = const()[name = string("op_9201_axes_0"), val = tensor([2])]; + tensor var_9199_cast_fp16 = transpose(perm = var_9198, x = hidden_states_199_cast_fp16)[name = string("transpose_100")]; + tensor var_9201_cast_fp16 = expand_dims(axes = var_9201_axes_0, x = var_9199_cast_fp16)[name = string("op_9201_cast_fp16")]; + string var_9217_pad_type_0 = const()[name = string("op_9217_pad_type_0"), val = string("valid")]; + tensor var_9217_strides_0 = const()[name = string("op_9217_strides_0"), val = tensor([1, 1])]; + tensor var_9217_pad_0 = const()[name = string("op_9217_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9217_dilations_0 = const()[name = string("op_9217_dilations_0"), val = tensor([1, 1])]; + int32 var_9217_groups_0 = const()[name = string("op_9217_groups_0"), val = int32(1)]; + tensor var_9217 = conv(dilations = var_9217_dilations_0, groups = var_9217_groups_0, pad = var_9217_pad_0, pad_type = var_9217_pad_type_0, strides = var_9217_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_9201_cast_fp16)[name = string("op_9217")]; + tensor var_9222 = const()[name = string("op_9222"), val = tensor([1, 4, 1, 256])]; + tensor var_9223 = reshape(shape = var_9222, x = var_9217)[name = string("op_9223")]; + string var_9239_pad_type_0 = const()[name = string("op_9239_pad_type_0"), val = string("valid")]; + tensor var_9239_strides_0 = const()[name = string("op_9239_strides_0"), val = tensor([1, 1])]; + tensor var_9239_pad_0 = const()[name = string("op_9239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9239_dilations_0 = const()[name = string("op_9239_dilations_0"), val = tensor([1, 1])]; + int32 var_9239_groups_0 = const()[name = string("op_9239_groups_0"), val = int32(1)]; + tensor var_9239 = conv(dilations = var_9239_dilations_0, groups = var_9239_groups_0, pad = var_9239_pad_0, pad_type = var_9239_pad_type_0, strides = var_9239_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_9201_cast_fp16)[name = string("op_9239")]; + tensor var_9244 = const()[name = string("op_9244"), val = tensor([1, 1, 1, 256])]; + tensor var_9245 = reshape(shape = var_9244, x = var_9239)[name = string("op_9245")]; + string var_9261_pad_type_0 = const()[name = string("op_9261_pad_type_0"), val = string("valid")]; + tensor var_9261_strides_0 = const()[name = string("op_9261_strides_0"), val = tensor([1, 1])]; + tensor var_9261_pad_0 = const()[name = string("op_9261_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9261_dilations_0 = const()[name = string("op_9261_dilations_0"), val = tensor([1, 1])]; + int32 var_9261_groups_0 = const()[name = string("op_9261_groups_0"), val = int32(1)]; + tensor var_9261 = conv(dilations = var_9261_dilations_0, groups = var_9261_groups_0, pad = var_9261_pad_0, pad_type = var_9261_pad_type_0, strides = var_9261_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_9201_cast_fp16)[name = string("op_9261")]; + tensor var_9266 = const()[name = string("op_9266"), val = tensor([1, 1, 1, 256])]; + tensor var_9267 = reshape(shape = var_9266, x = var_9261)[name = string("op_9267")]; + int32 var_9282 = const()[name = string("op_9282"), val = int32(-1)]; + fp16 const_460_promoted = const()[name = string("const_460_promoted"), val = fp16(-0x1p+0)]; + tensor var_9284 = mul(x = var_9223, y = const_460_promoted)[name = string("op_9284")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_9282, interleave = input_245_interleave_0, values = (var_9223, var_9284))[name = string("input_245")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_9279_to_fp16 = const()[name = string("op_9279_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_9279_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor var_9298_to_fp16 = const()[name = string("op_9298_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620051456)))]; + tensor q_25_cast_fp16 = mul(x = normed_295, y = var_9298_to_fp16)[name = string("q_25_cast_fp16")]; + int32 var_9309 = const()[name = string("op_9309"), val = int32(-1)]; + fp16 const_464_promoted = const()[name = string("const_464_promoted"), val = fp16(-0x1p+0)]; + tensor var_9311 = mul(x = var_9245, y = const_464_promoted)[name = string("op_9311")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_9309, interleave = input_247_interleave_0, values = (var_9245, var_9311))[name = string("input_247")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_9306_to_fp16 = const()[name = string("op_9306_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_9306_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor var_9325_to_fp16 = const()[name = string("op_9325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052032)))]; + tensor k_25_cast_fp16 = mul(x = normed_299, y = var_9325_to_fp16)[name = string("k_25_cast_fp16")]; + tensor var_9327_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9327_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_470_promoted_to_fp16 = const()[name = string("const_470_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9348_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_470_promoted_to_fp16)[name = string("op_9348_cast_fp16")]; + int32 var_9350 = const()[name = string("op_9350"), val = int32(-1)]; + bool var_9351_interleave_0 = const()[name = string("op_9351_interleave_0"), val = bool(false)]; + tensor var_9351_cast_fp16 = concat(axis = var_9350, interleave = var_9351_interleave_0, values = (var_9348_cast_fp16, x1_49_cast_fp16))[name = string("op_9351_cast_fp16")]; + tensor var_9352_cast_fp16 = mul(x = var_9351_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9352_cast_fp16")]; + tensor query_states_49_cast_fp16 = add(x = var_9327_cast_fp16, y = var_9352_cast_fp16)[name = string("query_states_49_cast_fp16")]; + tensor var_9355_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9355_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9376_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_473_promoted_to_fp16)[name = string("op_9376_cast_fp16")]; + int32 var_9378 = const()[name = string("op_9378"), val = int32(-1)]; + bool var_9379_interleave_0 = const()[name = string("op_9379_interleave_0"), val = bool(false)]; + tensor var_9379_cast_fp16 = concat(axis = var_9378, interleave = var_9379_interleave_0, values = (var_9376_cast_fp16, x1_51_cast_fp16))[name = string("op_9379_cast_fp16")]; + tensor var_9380_cast_fp16 = mul(x = var_9379_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9380_cast_fp16")]; + tensor key_states_49_cast_fp16 = add(x = var_9355_cast_fp16, y = var_9380_cast_fp16)[name = string("key_states_49_cast_fp16")]; + tensor key_slice_21_begin_0 = const()[name = string("key_slice_21_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor key_slice_21_end_0 = const()[name = string("key_slice_21_end_0"), val = tensor([11, 1, 512, 256])]; + tensor key_slice_21_end_mask_0 = const()[name = string("key_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_21_cast_fp16 = slice_by_index(begin = key_slice_21_begin_0, end = key_slice_21_end_0, end_mask = key_slice_21_end_mask_0, x = coreml_update_state_73)[name = string("key_slice_21_cast_fp16")]; + tensor key_tail_21_begin_0 = const()[name = string("key_tail_21_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_21_end_0 = const()[name = string("key_tail_21_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_21_cast_fp16 = slice_by_index(begin = key_tail_21_begin_0, end = key_tail_21_end_0, x = key_slice_21_cast_fp16)[name = string("key_tail_21_cast_fp16")]; + int32 var_9393 = const()[name = string("op_9393"), val = int32(2)]; + bool shifted_key_21_interleave_0 = const()[name = string("shifted_key_21_interleave_0"), val = bool(false)]; + tensor shifted_key_21_cast_fp16 = concat(axis = var_9393, interleave = shifted_key_21_interleave_0, values = (key_tail_21_cast_fp16, key_states_49_cast_fp16))[name = string("shifted_key_21_cast_fp16")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([10, 0, 0, 0])]; + tensor concat_57 = const()[name = string("concat_57"), val = tensor([11, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = shifted_key_21_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_76")]; + tensor value_slice_21_begin_0 = const()[name = string("value_slice_21_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor value_slice_21_end_0 = const()[name = string("value_slice_21_end_0"), val = tensor([33, 1, 512, 256])]; + tensor value_slice_21_end_mask_0 = const()[name = string("value_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_21_cast_fp16 = slice_by_index(begin = value_slice_21_begin_0, end = value_slice_21_end_0, end_mask = value_slice_21_end_mask_0, x = coreml_update_state_76)[name = string("value_slice_21_cast_fp16")]; + tensor value_tail_21_begin_0 = const()[name = string("value_tail_21_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_21_end_0 = const()[name = string("value_tail_21_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_21_cast_fp16 = slice_by_index(begin = value_tail_21_begin_0, end = value_tail_21_end_0, x = value_slice_21_cast_fp16)[name = string("value_tail_21_cast_fp16")]; + int32 var_9427 = const()[name = string("op_9427"), val = int32(2)]; + bool shifted_value_21_interleave_0 = const()[name = string("shifted_value_21_interleave_0"), val = bool(false)]; + tensor shifted_value_21_cast_fp16 = concat(axis = var_9427, interleave = shifted_value_21_interleave_0, values = (value_tail_21_cast_fp16, var_9267))[name = string("shifted_value_21_cast_fp16")]; + tensor concat_58 = const()[name = string("concat_58"), val = tensor([32, 0, 0, 0])]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([33, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = shifted_value_21_cast_fp16, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_77")]; + tensor var_9455_begin_0 = const()[name = string("op_9455_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_9455_end_0 = const()[name = string("op_9455_end_0"), val = tensor([11, 1, 512, 256])]; + tensor var_9455_end_mask_0 = const()[name = string("op_9455_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9455_cast_fp16 = slice_by_index(begin = var_9455_begin_0, end = var_9455_end_0, end_mask = var_9455_end_mask_0, x = coreml_update_state_77)[name = string("op_9455_cast_fp16")]; + tensor var_9462_begin_0 = const()[name = string("op_9462_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_9462_end_0 = const()[name = string("op_9462_end_0"), val = tensor([33, 1, 512, 256])]; + tensor var_9462_end_mask_0 = const()[name = string("op_9462_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9462_cast_fp16 = slice_by_index(begin = var_9462_begin_0, end = var_9462_end_0, end_mask = var_9462_end_mask_0, x = coreml_update_state_77)[name = string("op_9462_cast_fp16")]; + tensor var_9499 = const()[name = string("op_9499"), val = tensor([1, 4, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_9499, x = var_9455_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_9519 = const()[name = string("op_9519"), val = tensor([1, 4, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_9519, x = var_9462_cast_fp16)[name = string("x_203_cast_fp16")]; + bool var_9546_transpose_x_1 = const()[name = string("op_9546_transpose_x_1"), val = bool(false)]; + bool var_9546_transpose_y_1 = const()[name = string("op_9546_transpose_y_1"), val = bool(true)]; + tensor var_9546 = matmul(transpose_x = var_9546_transpose_x_1, transpose_y = var_9546_transpose_y_1, x = query_states_49_cast_fp16, y = x_197_cast_fp16)[name = string("op_9546")]; + fp16 var_9547_to_fp16 = const()[name = string("op_9547_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_9546, y = var_9547_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = var_2105)[name = string("attn_weights_75_cast_fp16")]; + int32 var_9582 = const()[name = string("op_9582"), val = int32(-1)]; + tensor attn_weights_77_cast_fp16 = softmax(axis = var_9582, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; + bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; + tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = x_203_cast_fp16)[name = string("attn_output_121_cast_fp16")]; + tensor var_9593_perm_0 = const()[name = string("op_9593_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9597 = const()[name = string("op_9597"), val = tensor([1, 1, 1024])]; + tensor var_9593_cast_fp16 = transpose(perm = var_9593_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_99")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_9597, x = var_9593_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_9602 = const()[name = string("op_9602"), val = tensor([0, 2, 1])]; + string var_9618_pad_type_0 = const()[name = string("op_9618_pad_type_0"), val = string("valid")]; + int32 var_9618_groups_0 = const()[name = string("op_9618_groups_0"), val = int32(1)]; + tensor var_9618_strides_0 = const()[name = string("op_9618_strides_0"), val = tensor([1])]; + tensor var_9618_pad_0 = const()[name = string("op_9618_pad_0"), val = tensor([0, 0])]; + tensor var_9618_dilations_0 = const()[name = string("op_9618_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620937408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9603_cast_fp16 = transpose(perm = var_9602, x = attn_output_125_cast_fp16)[name = string("transpose_98")]; + tensor var_9618_cast_fp16 = conv(dilations = var_9618_dilations_0, groups = var_9618_groups_0, pad = var_9618_pad_0, pad_type = var_9618_pad_type_0, strides = var_9618_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_9603_cast_fp16)[name = string("op_9618_cast_fp16")]; + tensor var_9622 = const()[name = string("op_9622"), val = tensor([0, 2, 1])]; + int32 var_9633 = const()[name = string("op_9633"), val = int32(-1)]; + fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_205_cast_fp16 = transpose(perm = var_9622, x = var_9618_cast_fp16)[name = string("transpose_97")]; + tensor var_9635_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_9635_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_9633, interleave = input_251_interleave_0, values = (hidden_states_205_cast_fp16, var_9635_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_9630_to_fp16 = const()[name = string("op_9630_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_9630_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor var_9649_to_fp16 = const()[name = string("op_9649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620974336)))]; + tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_9649_to_fp16)[name = string("attn_output_129_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = add(x = hidden_states_197_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + int32 var_9662 = const()[name = string("op_9662"), val = int32(-1)]; + fp16 const_486_promoted_to_fp16 = const()[name = string("const_486_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9664_cast_fp16 = mul(x = hidden_states_207_cast_fp16, y = const_486_promoted_to_fp16)[name = string("op_9664_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_9662, interleave = input_253_interleave_0, values = (hidden_states_207_cast_fp16, var_9664_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_9659_to_fp16 = const()[name = string("op_9659_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_9659_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor var_9678_to_fp16 = const()[name = string("op_9678_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620976704)))]; + tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_9678_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_9690 = const()[name = string("op_9690"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_9691_cast_fp16 = transpose(perm = var_9690, x = x_205_cast_fp16)[name = string("transpose_96")]; + tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_9691_cast_fp16)[name = string("input_255_cast_fp16")]; + string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; + tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; + tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; + int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620979072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626951104))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627172352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633144384))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; + string var_9716_mode_0 = const()[name = string("op_9716_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9716_cast_fp16 = gelu(mode = var_9716_mode_0, x = x_207_cast_fp16)[name = string("op_9716_cast_fp16")]; + tensor input_257_cast_fp16 = mul(x = var_9716_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633365632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639337664))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; + tensor var_9724_axes_0 = const()[name = string("op_9724_axes_0"), val = tensor([2])]; + tensor var_9724_cast_fp16 = squeeze(axes = var_9724_axes_0, x = e_25_cast_fp16)[name = string("op_9724_cast_fp16")]; + tensor var_9725 = const()[name = string("op_9725"), val = tensor([0, 2, 1])]; + int32 var_9736 = const()[name = string("op_9736"), val = int32(-1)]; + fp16 const_490_promoted_to_fp16 = const()[name = string("const_490_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_209_cast_fp16 = transpose(perm = var_9725, x = var_9724_cast_fp16)[name = string("transpose_95")]; + tensor var_9738_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_490_promoted_to_fp16)[name = string("op_9738_cast_fp16")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259_cast_fp16 = concat(axis = var_9736, interleave = input_259_interleave_0, values = (hidden_states_209_cast_fp16, var_9738_cast_fp16))[name = string("input_259_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_9733_to_fp16 = const()[name = string("op_9733_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_9733_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; + tensor var_9752_to_fp16 = const()[name = string("op_9752_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639374592)))]; + tensor hidden_states_211_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_9752_to_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = hidden_states_211_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + int32 var_9803 = const()[name = string("op_9803"), val = int32(-1)]; + fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9805_cast_fp16 = mul(x = hidden_states_213_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_9805_cast_fp16")]; + bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; + tensor input_261_cast_fp16 = concat(axis = var_9803, interleave = input_261_interleave_0, values = (hidden_states_213_cast_fp16, var_9805_cast_fp16))[name = string("input_261_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_9800_to_fp16 = const()[name = string("op_9800_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_9800_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; + tensor var_9819_to_fp16 = const()[name = string("op_9819_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639376960)))]; + tensor hidden_states_215_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_9819_to_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor var_9824 = const()[name = string("op_9824"), val = tensor([0, 2, 1])]; + tensor var_9827_axes_0 = const()[name = string("op_9827_axes_0"), val = tensor([2])]; + tensor var_9825_cast_fp16 = transpose(perm = var_9824, x = hidden_states_215_cast_fp16)[name = string("transpose_94")]; + tensor var_9827_cast_fp16 = expand_dims(axes = var_9827_axes_0, x = var_9825_cast_fp16)[name = string("op_9827_cast_fp16")]; + string var_9843_pad_type_0 = const()[name = string("op_9843_pad_type_0"), val = string("valid")]; + tensor var_9843_strides_0 = const()[name = string("op_9843_strides_0"), val = tensor([1, 1])]; + tensor var_9843_pad_0 = const()[name = string("op_9843_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9843_dilations_0 = const()[name = string("op_9843_dilations_0"), val = tensor([1, 1])]; + int32 var_9843_groups_0 = const()[name = string("op_9843_groups_0"), val = int32(1)]; + tensor var_9843 = conv(dilations = var_9843_dilations_0, groups = var_9843_groups_0, pad = var_9843_pad_0, pad_type = var_9843_pad_type_0, strides = var_9843_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_9827_cast_fp16)[name = string("op_9843")]; + tensor var_9848 = const()[name = string("op_9848"), val = tensor([1, 4, 1, 256])]; + tensor var_9849 = reshape(shape = var_9848, x = var_9843)[name = string("op_9849")]; + string var_9865_pad_type_0 = const()[name = string("op_9865_pad_type_0"), val = string("valid")]; + tensor var_9865_strides_0 = const()[name = string("op_9865_strides_0"), val = tensor([1, 1])]; + tensor var_9865_pad_0 = const()[name = string("op_9865_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9865_dilations_0 = const()[name = string("op_9865_dilations_0"), val = tensor([1, 1])]; + int32 var_9865_groups_0 = const()[name = string("op_9865_groups_0"), val = int32(1)]; + tensor var_9865 = conv(dilations = var_9865_dilations_0, groups = var_9865_groups_0, pad = var_9865_pad_0, pad_type = var_9865_pad_type_0, strides = var_9865_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_9827_cast_fp16)[name = string("op_9865")]; + tensor var_9870 = const()[name = string("op_9870"), val = tensor([1, 1, 1, 256])]; + tensor var_9871 = reshape(shape = var_9870, x = var_9865)[name = string("op_9871")]; + string var_9887_pad_type_0 = const()[name = string("op_9887_pad_type_0"), val = string("valid")]; + tensor var_9887_strides_0 = const()[name = string("op_9887_strides_0"), val = tensor([1, 1])]; + tensor var_9887_pad_0 = const()[name = string("op_9887_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9887_dilations_0 = const()[name = string("op_9887_dilations_0"), val = tensor([1, 1])]; + int32 var_9887_groups_0 = const()[name = string("op_9887_groups_0"), val = int32(1)]; + tensor var_9887 = conv(dilations = var_9887_dilations_0, groups = var_9887_groups_0, pad = var_9887_pad_0, pad_type = var_9887_pad_type_0, strides = var_9887_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_9827_cast_fp16)[name = string("op_9887")]; + tensor var_9892 = const()[name = string("op_9892"), val = tensor([1, 1, 1, 256])]; + tensor var_9893 = reshape(shape = var_9892, x = var_9887)[name = string("op_9893")]; + int32 var_9908 = const()[name = string("op_9908"), val = int32(-1)]; + fp16 const_498_promoted = const()[name = string("const_498_promoted"), val = fp16(-0x1p+0)]; + tensor var_9910 = mul(x = var_9849, y = const_498_promoted)[name = string("op_9910")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265 = concat(axis = var_9908, interleave = input_265_interleave_0, values = (var_9849, var_9910))[name = string("input_265")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_9905_to_fp16 = const()[name = string("op_9905_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_9905_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; + tensor var_9924_to_fp16 = const()[name = string("op_9924_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379328)))]; + tensor q_27_cast_fp16 = mul(x = normed_319, y = var_9924_to_fp16)[name = string("q_27_cast_fp16")]; + int32 var_9935 = const()[name = string("op_9935"), val = int32(-1)]; + fp16 const_502_promoted = const()[name = string("const_502_promoted"), val = fp16(-0x1p+0)]; + tensor var_9937 = mul(x = var_9871, y = const_502_promoted)[name = string("op_9937")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267 = concat(axis = var_9935, interleave = input_267_interleave_0, values = (var_9871, var_9937))[name = string("input_267")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_9932_to_fp16 = const()[name = string("op_9932_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_9932_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; + tensor var_9951_to_fp16 = const()[name = string("op_9951_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379904)))]; + tensor k_27_cast_fp16 = mul(x = normed_323, y = var_9951_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_9953_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9953_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9974_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9974_cast_fp16")]; + int32 var_9976 = const()[name = string("op_9976"), val = int32(-1)]; + bool var_9977_interleave_0 = const()[name = string("op_9977_interleave_0"), val = bool(false)]; + tensor var_9977_cast_fp16 = concat(axis = var_9976, interleave = var_9977_interleave_0, values = (var_9974_cast_fp16, x1_53_cast_fp16))[name = string("op_9977_cast_fp16")]; + tensor var_9978_cast_fp16 = mul(x = var_9977_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9978_cast_fp16")]; + tensor query_states_53_cast_fp16 = add(x = var_9953_cast_fp16, y = var_9978_cast_fp16)[name = string("query_states_53_cast_fp16")]; + tensor var_9981_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9981_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_511_promoted_to_fp16 = const()[name = string("const_511_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10002_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_511_promoted_to_fp16)[name = string("op_10002_cast_fp16")]; + int32 var_10004 = const()[name = string("op_10004"), val = int32(-1)]; + bool var_10005_interleave_0 = const()[name = string("op_10005_interleave_0"), val = bool(false)]; + tensor var_10005_cast_fp16 = concat(axis = var_10004, interleave = var_10005_interleave_0, values = (var_10002_cast_fp16, x1_55_cast_fp16))[name = string("op_10005_cast_fp16")]; + tensor var_10006_cast_fp16 = mul(x = var_10005_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10006_cast_fp16")]; + tensor key_states_53_cast_fp16 = add(x = var_9981_cast_fp16, y = var_10006_cast_fp16)[name = string("key_states_53_cast_fp16")]; + tensor key_slice_23_begin_0 = const()[name = string("key_slice_23_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor key_slice_23_end_0 = const()[name = string("key_slice_23_end_0"), val = tensor([12, 1, 512, 256])]; + tensor key_slice_23_end_mask_0 = const()[name = string("key_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_23_cast_fp16 = slice_by_index(begin = key_slice_23_begin_0, end = key_slice_23_end_0, end_mask = key_slice_23_end_mask_0, x = coreml_update_state_77)[name = string("key_slice_23_cast_fp16")]; + tensor key_tail_23_begin_0 = const()[name = string("key_tail_23_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_23_end_0 = const()[name = string("key_tail_23_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_23_cast_fp16 = slice_by_index(begin = key_tail_23_begin_0, end = key_tail_23_end_0, x = key_slice_23_cast_fp16)[name = string("key_tail_23_cast_fp16")]; + int32 var_10019 = const()[name = string("op_10019"), val = int32(2)]; + bool shifted_key_23_interleave_0 = const()[name = string("shifted_key_23_interleave_0"), val = bool(false)]; + tensor shifted_key_23_cast_fp16 = concat(axis = var_10019, interleave = shifted_key_23_interleave_0, values = (key_tail_23_cast_fp16, key_states_53_cast_fp16))[name = string("shifted_key_23_cast_fp16")]; + tensor concat_60 = const()[name = string("concat_60"), val = tensor([11, 0, 0, 0])]; + tensor concat_61 = const()[name = string("concat_61"), val = tensor([12, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = shifted_key_23_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_78")]; + tensor value_slice_23_begin_0 = const()[name = string("value_slice_23_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor value_slice_23_end_0 = const()[name = string("value_slice_23_end_0"), val = tensor([34, 1, 512, 256])]; + tensor value_slice_23_end_mask_0 = const()[name = string("value_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_23_cast_fp16 = slice_by_index(begin = value_slice_23_begin_0, end = value_slice_23_end_0, end_mask = value_slice_23_end_mask_0, x = coreml_update_state_78)[name = string("value_slice_23_cast_fp16")]; + tensor value_tail_23_begin_0 = const()[name = string("value_tail_23_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_23_end_0 = const()[name = string("value_tail_23_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_23_cast_fp16 = slice_by_index(begin = value_tail_23_begin_0, end = value_tail_23_end_0, x = value_slice_23_cast_fp16)[name = string("value_tail_23_cast_fp16")]; + int32 var_10053 = const()[name = string("op_10053"), val = int32(2)]; + bool shifted_value_23_interleave_0 = const()[name = string("shifted_value_23_interleave_0"), val = bool(false)]; + tensor shifted_value_23_cast_fp16 = concat(axis = var_10053, interleave = shifted_value_23_interleave_0, values = (value_tail_23_cast_fp16, var_9893))[name = string("shifted_value_23_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([33, 0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([34, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = shifted_value_23_cast_fp16, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_79")]; + tensor var_10081_begin_0 = const()[name = string("op_10081_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_10081_end_0 = const()[name = string("op_10081_end_0"), val = tensor([12, 1, 512, 256])]; + tensor var_10081_end_mask_0 = const()[name = string("op_10081_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10081_cast_fp16 = slice_by_index(begin = var_10081_begin_0, end = var_10081_end_0, end_mask = var_10081_end_mask_0, x = coreml_update_state_79)[name = string("op_10081_cast_fp16")]; + tensor var_10088_begin_0 = const()[name = string("op_10088_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_10088_end_0 = const()[name = string("op_10088_end_0"), val = tensor([34, 1, 512, 256])]; + tensor var_10088_end_mask_0 = const()[name = string("op_10088_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10088_cast_fp16 = slice_by_index(begin = var_10088_begin_0, end = var_10088_end_0, end_mask = var_10088_end_mask_0, x = coreml_update_state_79)[name = string("op_10088_cast_fp16")]; + tensor var_10125 = const()[name = string("op_10125"), val = tensor([1, 4, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_10125, x = var_10081_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_10145 = const()[name = string("op_10145"), val = tensor([1, 4, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_10145, x = var_10088_cast_fp16)[name = string("x_219_cast_fp16")]; + bool var_10172_transpose_x_1 = const()[name = string("op_10172_transpose_x_1"), val = bool(false)]; + bool var_10172_transpose_y_1 = const()[name = string("op_10172_transpose_y_1"), val = bool(true)]; + tensor var_10172 = matmul(transpose_x = var_10172_transpose_x_1, transpose_y = var_10172_transpose_y_1, x = query_states_53_cast_fp16, y = x_213_cast_fp16)[name = string("op_10172")]; + fp16 var_10173_to_fp16 = const()[name = string("op_10173_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_79_cast_fp16 = mul(x = var_10172, y = var_10173_to_fp16)[name = string("attn_weights_79_cast_fp16")]; + tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = var_2105)[name = string("attn_weights_81_cast_fp16")]; + int32 var_10208 = const()[name = string("op_10208"), val = int32(-1)]; + tensor attn_weights_83_cast_fp16 = softmax(axis = var_10208, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; + bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; + bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; + tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = x_219_cast_fp16)[name = string("attn_output_131_cast_fp16")]; + tensor var_10219_perm_0 = const()[name = string("op_10219_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10223 = const()[name = string("op_10223"), val = tensor([1, 1, 1024])]; + tensor var_10219_cast_fp16 = transpose(perm = var_10219_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_93")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_10223, x = var_10219_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_10228 = const()[name = string("op_10228"), val = tensor([0, 2, 1])]; + string var_10244_pad_type_0 = const()[name = string("op_10244_pad_type_0"), val = string("valid")]; + int32 var_10244_groups_0 = const()[name = string("op_10244_groups_0"), val = int32(1)]; + tensor var_10244_strides_0 = const()[name = string("op_10244_strides_0"), val = tensor([1])]; + tensor var_10244_pad_0 = const()[name = string("op_10244_pad_0"), val = tensor([0, 0])]; + tensor var_10244_dilations_0 = const()[name = string("op_10244_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639380480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640265280))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10229_cast_fp16 = transpose(perm = var_10228, x = attn_output_135_cast_fp16)[name = string("transpose_92")]; + tensor var_10244_cast_fp16 = conv(dilations = var_10244_dilations_0, groups = var_10244_groups_0, pad = var_10244_pad_0, pad_type = var_10244_pad_type_0, strides = var_10244_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_10229_cast_fp16)[name = string("op_10244_cast_fp16")]; + tensor var_10248 = const()[name = string("op_10248"), val = tensor([0, 2, 1])]; + int32 var_10259 = const()[name = string("op_10259"), val = int32(-1)]; + fp16 const_520_promoted_to_fp16 = const()[name = string("const_520_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_221_cast_fp16 = transpose(perm = var_10248, x = var_10244_cast_fp16)[name = string("transpose_91")]; + tensor var_10261_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_520_promoted_to_fp16)[name = string("op_10261_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_10259, interleave = input_271_interleave_0, values = (hidden_states_221_cast_fp16, var_10261_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_10256_to_fp16 = const()[name = string("op_10256_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_10256_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; + tensor var_10275_to_fp16 = const()[name = string("op_10275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640302208)))]; + tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_10275_to_fp16)[name = string("attn_output_139_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = add(x = hidden_states_213_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + int32 var_10288 = const()[name = string("op_10288"), val = int32(-1)]; + fp16 const_524_promoted_to_fp16 = const()[name = string("const_524_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10290_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = const_524_promoted_to_fp16)[name = string("op_10290_cast_fp16")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273_cast_fp16 = concat(axis = var_10288, interleave = input_273_interleave_0, values = (hidden_states_223_cast_fp16, var_10290_cast_fp16))[name = string("input_273_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_10285_to_fp16 = const()[name = string("op_10285_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_10285_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; + tensor var_10304_to_fp16 = const()[name = string("op_10304_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640304576)))]; + tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_10304_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_10316 = const()[name = string("op_10316"), val = tensor([0, 2, 1])]; + tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; + tensor var_10317_cast_fp16 = transpose(perm = var_10316, x = x_221_cast_fp16)[name = string("transpose_90")]; + tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_10317_cast_fp16)[name = string("input_275_cast_fp16")]; + string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; + tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; + tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; + int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640306944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646278976))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646500224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652472256))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; + string var_10342_mode_0 = const()[name = string("op_10342_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10342_cast_fp16 = gelu(mode = var_10342_mode_0, x = x_223_cast_fp16)[name = string("op_10342_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_10342_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652693504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658665536))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; + tensor var_10350_axes_0 = const()[name = string("op_10350_axes_0"), val = tensor([2])]; + tensor var_10350_cast_fp16 = squeeze(axes = var_10350_axes_0, x = e_27_cast_fp16)[name = string("op_10350_cast_fp16")]; + tensor var_10351 = const()[name = string("op_10351"), val = tensor([0, 2, 1])]; + int32 var_10362 = const()[name = string("op_10362"), val = int32(-1)]; + fp16 const_528_promoted_to_fp16 = const()[name = string("const_528_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_225_cast_fp16 = transpose(perm = var_10351, x = var_10350_cast_fp16)[name = string("transpose_89")]; + tensor var_10364_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_528_promoted_to_fp16)[name = string("op_10364_cast_fp16")]; + bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; + tensor input_279_cast_fp16 = concat(axis = var_10362, interleave = input_279_interleave_0, values = (hidden_states_225_cast_fp16, var_10364_cast_fp16))[name = string("input_279_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_10359_to_fp16 = const()[name = string("op_10359_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_10359_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor var_10378_to_fp16 = const()[name = string("op_10378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658702464)))]; + tensor hidden_states_227_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_10378_to_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_223_cast_fp16, y = hidden_states_227_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_10429 = const()[name = string("op_10429"), val = int32(-1)]; + fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10431_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10431_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_10429, interleave = input_281_interleave_0, values = (hidden_states_229_cast_fp16, var_10431_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_10426_to_fp16 = const()[name = string("op_10426_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_10426_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor var_10445_to_fp16 = const()[name = string("op_10445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658704832)))]; + tensor hidden_states_231_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_10445_to_fp16)[name = string("hidden_states_231_cast_fp16")]; + tensor var_10450 = const()[name = string("op_10450"), val = tensor([0, 2, 1])]; + tensor var_10453_axes_0 = const()[name = string("op_10453_axes_0"), val = tensor([2])]; + tensor var_10451_cast_fp16 = transpose(perm = var_10450, x = hidden_states_231_cast_fp16)[name = string("transpose_88")]; + tensor var_10453_cast_fp16 = expand_dims(axes = var_10453_axes_0, x = var_10451_cast_fp16)[name = string("op_10453_cast_fp16")]; + string var_10469_pad_type_0 = const()[name = string("op_10469_pad_type_0"), val = string("valid")]; + tensor var_10469_strides_0 = const()[name = string("op_10469_strides_0"), val = tensor([1, 1])]; + tensor var_10469_pad_0 = const()[name = string("op_10469_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10469_dilations_0 = const()[name = string("op_10469_dilations_0"), val = tensor([1, 1])]; + int32 var_10469_groups_0 = const()[name = string("op_10469_groups_0"), val = int32(1)]; + tensor var_10469 = conv(dilations = var_10469_dilations_0, groups = var_10469_groups_0, pad = var_10469_pad_0, pad_type = var_10469_pad_type_0, strides = var_10469_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_10453_cast_fp16)[name = string("op_10469")]; + tensor var_10474 = const()[name = string("op_10474"), val = tensor([1, 4, 1, 256])]; + tensor var_10475 = reshape(shape = var_10474, x = var_10469)[name = string("op_10475")]; + string var_10491_pad_type_0 = const()[name = string("op_10491_pad_type_0"), val = string("valid")]; + tensor var_10491_strides_0 = const()[name = string("op_10491_strides_0"), val = tensor([1, 1])]; + tensor var_10491_pad_0 = const()[name = string("op_10491_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10491_dilations_0 = const()[name = string("op_10491_dilations_0"), val = tensor([1, 1])]; + int32 var_10491_groups_0 = const()[name = string("op_10491_groups_0"), val = int32(1)]; + tensor var_10491 = conv(dilations = var_10491_dilations_0, groups = var_10491_groups_0, pad = var_10491_pad_0, pad_type = var_10491_pad_type_0, strides = var_10491_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_10453_cast_fp16)[name = string("op_10491")]; + tensor var_10496 = const()[name = string("op_10496"), val = tensor([1, 1, 1, 256])]; + tensor var_10497 = reshape(shape = var_10496, x = var_10491)[name = string("op_10497")]; + string var_10513_pad_type_0 = const()[name = string("op_10513_pad_type_0"), val = string("valid")]; + tensor var_10513_strides_0 = const()[name = string("op_10513_strides_0"), val = tensor([1, 1])]; + tensor var_10513_pad_0 = const()[name = string("op_10513_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10513_dilations_0 = const()[name = string("op_10513_dilations_0"), val = tensor([1, 1])]; + int32 var_10513_groups_0 = const()[name = string("op_10513_groups_0"), val = int32(1)]; + tensor var_10513 = conv(dilations = var_10513_dilations_0, groups = var_10513_groups_0, pad = var_10513_pad_0, pad_type = var_10513_pad_type_0, strides = var_10513_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_10453_cast_fp16)[name = string("op_10513")]; + tensor var_10518 = const()[name = string("op_10518"), val = tensor([1, 1, 1, 256])]; + tensor var_10519 = reshape(shape = var_10518, x = var_10513)[name = string("op_10519")]; + int32 var_10534 = const()[name = string("op_10534"), val = int32(-1)]; + fp16 const_536_promoted = const()[name = string("const_536_promoted"), val = fp16(-0x1p+0)]; + tensor var_10536 = mul(x = var_10475, y = const_536_promoted)[name = string("op_10536")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285 = concat(axis = var_10534, interleave = input_285_interleave_0, values = (var_10475, var_10536))[name = string("input_285")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_10531_to_fp16 = const()[name = string("op_10531_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_10531_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor var_10550_to_fp16 = const()[name = string("op_10550_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707200)))]; + tensor q_29_cast_fp16 = mul(x = normed_343, y = var_10550_to_fp16)[name = string("q_29_cast_fp16")]; + int32 var_10561 = const()[name = string("op_10561"), val = int32(-1)]; + fp16 const_540_promoted = const()[name = string("const_540_promoted"), val = fp16(-0x1p+0)]; + tensor var_10563 = mul(x = var_10497, y = const_540_promoted)[name = string("op_10563")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287 = concat(axis = var_10561, interleave = input_287_interleave_0, values = (var_10497, var_10563))[name = string("input_287")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_10558_to_fp16 = const()[name = string("op_10558_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_10558_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor var_10577_to_fp16 = const()[name = string("op_10577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707776)))]; + tensor k_29_cast_fp16 = mul(x = normed_347, y = var_10577_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_10579_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10579_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10600_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_10600_cast_fp16")]; + int32 var_10602 = const()[name = string("op_10602"), val = int32(-1)]; + bool var_10603_interleave_0 = const()[name = string("op_10603_interleave_0"), val = bool(false)]; + tensor var_10603_cast_fp16 = concat(axis = var_10602, interleave = var_10603_interleave_0, values = (var_10600_cast_fp16, x1_57_cast_fp16))[name = string("op_10603_cast_fp16")]; + tensor var_10604_cast_fp16 = mul(x = var_10603_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10604_cast_fp16")]; + tensor query_states_57_cast_fp16 = add(x = var_10579_cast_fp16, y = var_10604_cast_fp16)[name = string("query_states_57_cast_fp16")]; + tensor var_10607_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10607_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10628_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_10628_cast_fp16")]; + int32 var_10630 = const()[name = string("op_10630"), val = int32(-1)]; + bool var_10631_interleave_0 = const()[name = string("op_10631_interleave_0"), val = bool(false)]; + tensor var_10631_cast_fp16 = concat(axis = var_10630, interleave = var_10631_interleave_0, values = (var_10628_cast_fp16, x1_59_cast_fp16))[name = string("op_10631_cast_fp16")]; + tensor var_10632_cast_fp16 = mul(x = var_10631_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10632_cast_fp16")]; + tensor key_states_57_cast_fp16 = add(x = var_10607_cast_fp16, y = var_10632_cast_fp16)[name = string("key_states_57_cast_fp16")]; + tensor key_slice_25_begin_0 = const()[name = string("key_slice_25_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor key_slice_25_end_0 = const()[name = string("key_slice_25_end_0"), val = tensor([13, 1, 512, 256])]; + tensor key_slice_25_end_mask_0 = const()[name = string("key_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_25_cast_fp16 = slice_by_index(begin = key_slice_25_begin_0, end = key_slice_25_end_0, end_mask = key_slice_25_end_mask_0, x = coreml_update_state_79)[name = string("key_slice_25_cast_fp16")]; + tensor key_tail_25_begin_0 = const()[name = string("key_tail_25_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_25_end_0 = const()[name = string("key_tail_25_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_25_cast_fp16 = slice_by_index(begin = key_tail_25_begin_0, end = key_tail_25_end_0, x = key_slice_25_cast_fp16)[name = string("key_tail_25_cast_fp16")]; + int32 var_10645 = const()[name = string("op_10645"), val = int32(2)]; + bool shifted_key_25_interleave_0 = const()[name = string("shifted_key_25_interleave_0"), val = bool(false)]; + tensor shifted_key_25_cast_fp16 = concat(axis = var_10645, interleave = shifted_key_25_interleave_0, values = (key_tail_25_cast_fp16, key_states_57_cast_fp16))[name = string("shifted_key_25_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([12, 0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([13, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_64, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_65, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = shifted_key_25_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_80")]; + tensor value_slice_25_begin_0 = const()[name = string("value_slice_25_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor value_slice_25_end_0 = const()[name = string("value_slice_25_end_0"), val = tensor([35, 1, 512, 256])]; + tensor value_slice_25_end_mask_0 = const()[name = string("value_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_25_cast_fp16 = slice_by_index(begin = value_slice_25_begin_0, end = value_slice_25_end_0, end_mask = value_slice_25_end_mask_0, x = coreml_update_state_80)[name = string("value_slice_25_cast_fp16")]; + tensor value_tail_25_begin_0 = const()[name = string("value_tail_25_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_25_end_0 = const()[name = string("value_tail_25_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_25_cast_fp16 = slice_by_index(begin = value_tail_25_begin_0, end = value_tail_25_end_0, x = value_slice_25_cast_fp16)[name = string("value_tail_25_cast_fp16")]; + int32 var_10679 = const()[name = string("op_10679"), val = int32(2)]; + bool shifted_value_25_interleave_0 = const()[name = string("shifted_value_25_interleave_0"), val = bool(false)]; + tensor shifted_value_25_cast_fp16 = concat(axis = var_10679, interleave = shifted_value_25_interleave_0, values = (value_tail_25_cast_fp16, var_10519))[name = string("shifted_value_25_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([34, 0, 0, 0])]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([35, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = shifted_value_25_cast_fp16, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_81")]; + tensor var_10707_begin_0 = const()[name = string("op_10707_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_10707_end_0 = const()[name = string("op_10707_end_0"), val = tensor([13, 1, 512, 256])]; + tensor var_10707_end_mask_0 = const()[name = string("op_10707_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10707_cast_fp16 = slice_by_index(begin = var_10707_begin_0, end = var_10707_end_0, end_mask = var_10707_end_mask_0, x = coreml_update_state_81)[name = string("op_10707_cast_fp16")]; + tensor var_10714_begin_0 = const()[name = string("op_10714_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_10714_end_0 = const()[name = string("op_10714_end_0"), val = tensor([35, 1, 512, 256])]; + tensor var_10714_end_mask_0 = const()[name = string("op_10714_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10714_cast_fp16 = slice_by_index(begin = var_10714_begin_0, end = var_10714_end_0, end_mask = var_10714_end_mask_0, x = coreml_update_state_81)[name = string("op_10714_cast_fp16")]; + tensor var_10751 = const()[name = string("op_10751"), val = tensor([1, 4, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_10751, x = var_10707_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_10771 = const()[name = string("op_10771"), val = tensor([1, 4, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_10771, x = var_10714_cast_fp16)[name = string("x_235_cast_fp16")]; + bool var_10798_transpose_x_1 = const()[name = string("op_10798_transpose_x_1"), val = bool(false)]; + bool var_10798_transpose_y_1 = const()[name = string("op_10798_transpose_y_1"), val = bool(true)]; + tensor var_10798 = matmul(transpose_x = var_10798_transpose_x_1, transpose_y = var_10798_transpose_y_1, x = query_states_57_cast_fp16, y = x_229_cast_fp16)[name = string("op_10798")]; + fp16 var_10799_to_fp16 = const()[name = string("op_10799_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_10798, y = var_10799_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = var_2105)[name = string("attn_weights_87_cast_fp16")]; + int32 var_10834 = const()[name = string("op_10834"), val = int32(-1)]; + tensor attn_weights_89_cast_fp16 = softmax(axis = var_10834, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; + bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; + bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; + tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = x_235_cast_fp16)[name = string("attn_output_141_cast_fp16")]; + tensor var_10845_perm_0 = const()[name = string("op_10845_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10849 = const()[name = string("op_10849"), val = tensor([1, 1, 1024])]; + tensor var_10845_cast_fp16 = transpose(perm = var_10845_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_87")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_10849, x = var_10845_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_10854 = const()[name = string("op_10854"), val = tensor([0, 2, 1])]; + string var_10870_pad_type_0 = const()[name = string("op_10870_pad_type_0"), val = string("valid")]; + int32 var_10870_groups_0 = const()[name = string("op_10870_groups_0"), val = int32(1)]; + tensor var_10870_strides_0 = const()[name = string("op_10870_strides_0"), val = tensor([1])]; + tensor var_10870_pad_0 = const()[name = string("op_10870_pad_0"), val = tensor([0, 0])]; + tensor var_10870_dilations_0 = const()[name = string("op_10870_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658708352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659593152))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10855_cast_fp16 = transpose(perm = var_10854, x = attn_output_145_cast_fp16)[name = string("transpose_86")]; + tensor var_10870_cast_fp16 = conv(dilations = var_10870_dilations_0, groups = var_10870_groups_0, pad = var_10870_pad_0, pad_type = var_10870_pad_type_0, strides = var_10870_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_10855_cast_fp16)[name = string("op_10870_cast_fp16")]; + tensor var_10874 = const()[name = string("op_10874"), val = tensor([0, 2, 1])]; + int32 var_10885 = const()[name = string("op_10885"), val = int32(-1)]; + fp16 const_558_promoted_to_fp16 = const()[name = string("const_558_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_237_cast_fp16 = transpose(perm = var_10874, x = var_10870_cast_fp16)[name = string("transpose_85")]; + tensor var_10887_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_558_promoted_to_fp16)[name = string("op_10887_cast_fp16")]; + bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; + tensor input_291_cast_fp16 = concat(axis = var_10885, interleave = input_291_interleave_0, values = (hidden_states_237_cast_fp16, var_10887_cast_fp16))[name = string("input_291_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_10882_to_fp16 = const()[name = string("op_10882_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_10882_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor var_10901_to_fp16 = const()[name = string("op_10901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659630080)))]; + tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_10901_to_fp16)[name = string("attn_output_149_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_10914 = const()[name = string("op_10914"), val = int32(-1)]; + fp16 const_562_promoted_to_fp16 = const()[name = string("const_562_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10916_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_562_promoted_to_fp16)[name = string("op_10916_cast_fp16")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293_cast_fp16 = concat(axis = var_10914, interleave = input_293_interleave_0, values = (hidden_states_239_cast_fp16, var_10916_cast_fp16))[name = string("input_293_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_10911_to_fp16 = const()[name = string("op_10911_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_10911_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor var_10930_to_fp16 = const()[name = string("op_10930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659632448)))]; + tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_10930_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_10942 = const()[name = string("op_10942"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_10943_cast_fp16 = transpose(perm = var_10942, x = x_237_cast_fp16)[name = string("transpose_84")]; + tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_10943_cast_fp16)[name = string("input_295_cast_fp16")]; + string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; + tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; + tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; + int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659634816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665606848))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665828096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671800128))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; + string var_10968_mode_0 = const()[name = string("op_10968_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10968_cast_fp16 = gelu(mode = var_10968_mode_0, x = x_239_cast_fp16)[name = string("op_10968_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = var_10968_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672021376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677993408))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; + tensor var_10976_axes_0 = const()[name = string("op_10976_axes_0"), val = tensor([2])]; + tensor var_10976_cast_fp16 = squeeze(axes = var_10976_axes_0, x = e_29_cast_fp16)[name = string("op_10976_cast_fp16")]; + tensor var_10977 = const()[name = string("op_10977"), val = tensor([0, 2, 1])]; + int32 var_10988 = const()[name = string("op_10988"), val = int32(-1)]; + fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_241_cast_fp16 = transpose(perm = var_10977, x = var_10976_cast_fp16)[name = string("transpose_83")]; + tensor var_10990_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_10990_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_10988, interleave = input_299_interleave_0, values = (hidden_states_241_cast_fp16, var_10990_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_10985_to_fp16 = const()[name = string("op_10985_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_10985_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; + tensor var_11004_to_fp16 = const()[name = string("op_11004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678030336)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_11004_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor hidden_states_245_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; + int32 var_11055 = const()[name = string("op_11055"), val = int32(-1)]; + fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11057_cast_fp16 = mul(x = hidden_states_245_cast_fp16, y = const_570_promoted_to_fp16)[name = string("op_11057_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_11055, interleave = input_301_interleave_0, values = (hidden_states_245_cast_fp16, var_11057_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_11052_to_fp16 = const()[name = string("op_11052_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_11052_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; + tensor var_11071_to_fp16 = const()[name = string("op_11071_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678032704)))]; + tensor hidden_states_247_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_11071_to_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_11076 = const()[name = string("op_11076"), val = tensor([0, 2, 1])]; + tensor var_11079_axes_0 = const()[name = string("op_11079_axes_0"), val = tensor([2])]; + tensor var_11077_cast_fp16 = transpose(perm = var_11076, x = hidden_states_247_cast_fp16)[name = string("transpose_82")]; + tensor var_11079_cast_fp16 = expand_dims(axes = var_11079_axes_0, x = var_11077_cast_fp16)[name = string("op_11079_cast_fp16")]; + string var_11095_pad_type_0 = const()[name = string("op_11095_pad_type_0"), val = string("valid")]; + tensor var_11095_strides_0 = const()[name = string("op_11095_strides_0"), val = tensor([1, 1])]; + tensor var_11095_pad_0 = const()[name = string("op_11095_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11095_dilations_0 = const()[name = string("op_11095_dilations_0"), val = tensor([1, 1])]; + int32 var_11095_groups_0 = const()[name = string("op_11095_groups_0"), val = int32(1)]; + tensor var_11095 = conv(dilations = var_11095_dilations_0, groups = var_11095_groups_0, pad = var_11095_pad_0, pad_type = var_11095_pad_type_0, strides = var_11095_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_11079_cast_fp16)[name = string("op_11095")]; + tensor var_11100 = const()[name = string("op_11100"), val = tensor([1, 4, 1, 256])]; + tensor var_11101 = reshape(shape = var_11100, x = var_11095)[name = string("op_11101")]; + string var_11117_pad_type_0 = const()[name = string("op_11117_pad_type_0"), val = string("valid")]; + tensor var_11117_strides_0 = const()[name = string("op_11117_strides_0"), val = tensor([1, 1])]; + tensor var_11117_pad_0 = const()[name = string("op_11117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11117_dilations_0 = const()[name = string("op_11117_dilations_0"), val = tensor([1, 1])]; + int32 var_11117_groups_0 = const()[name = string("op_11117_groups_0"), val = int32(1)]; + tensor var_11117 = conv(dilations = var_11117_dilations_0, groups = var_11117_groups_0, pad = var_11117_pad_0, pad_type = var_11117_pad_type_0, strides = var_11117_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_11079_cast_fp16)[name = string("op_11117")]; + tensor var_11122 = const()[name = string("op_11122"), val = tensor([1, 1, 1, 256])]; + tensor var_11123 = reshape(shape = var_11122, x = var_11117)[name = string("op_11123")]; + string var_11139_pad_type_0 = const()[name = string("op_11139_pad_type_0"), val = string("valid")]; + tensor var_11139_strides_0 = const()[name = string("op_11139_strides_0"), val = tensor([1, 1])]; + tensor var_11139_pad_0 = const()[name = string("op_11139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11139_dilations_0 = const()[name = string("op_11139_dilations_0"), val = tensor([1, 1])]; + int32 var_11139_groups_0 = const()[name = string("op_11139_groups_0"), val = int32(1)]; + tensor var_11139 = conv(dilations = var_11139_dilations_0, groups = var_11139_groups_0, pad = var_11139_pad_0, pad_type = var_11139_pad_type_0, strides = var_11139_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_11079_cast_fp16)[name = string("op_11139")]; + tensor var_11144 = const()[name = string("op_11144"), val = tensor([1, 1, 1, 256])]; + tensor var_11145 = reshape(shape = var_11144, x = var_11139)[name = string("op_11145")]; + int32 var_11160 = const()[name = string("op_11160"), val = int32(-1)]; + fp16 const_574_promoted = const()[name = string("const_574_promoted"), val = fp16(-0x1p+0)]; + tensor var_11162 = mul(x = var_11101, y = const_574_promoted)[name = string("op_11162")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_11160, interleave = input_305_interleave_0, values = (var_11101, var_11162))[name = string("input_305")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_11157_to_fp16 = const()[name = string("op_11157_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_11157_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; + tensor var_11176_to_fp16 = const()[name = string("op_11176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035072)))]; + tensor q_31_cast_fp16 = mul(x = normed_367, y = var_11176_to_fp16)[name = string("q_31_cast_fp16")]; + int32 var_11187 = const()[name = string("op_11187"), val = int32(-1)]; + fp16 const_578_promoted = const()[name = string("const_578_promoted"), val = fp16(-0x1p+0)]; + tensor var_11189 = mul(x = var_11123, y = const_578_promoted)[name = string("op_11189")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_11187, interleave = input_307_interleave_0, values = (var_11123, var_11189))[name = string("input_307")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_11184_to_fp16 = const()[name = string("op_11184_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_11184_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; + tensor var_11203_to_fp16 = const()[name = string("op_11203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035648)))]; + tensor k_31_cast_fp16 = mul(x = normed_371, y = var_11203_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_11205_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11205_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_584_promoted_to_fp16 = const()[name = string("const_584_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11226_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_584_promoted_to_fp16)[name = string("op_11226_cast_fp16")]; + int32 var_11228 = const()[name = string("op_11228"), val = int32(-1)]; + bool var_11229_interleave_0 = const()[name = string("op_11229_interleave_0"), val = bool(false)]; + tensor var_11229_cast_fp16 = concat(axis = var_11228, interleave = var_11229_interleave_0, values = (var_11226_cast_fp16, x1_61_cast_fp16))[name = string("op_11229_cast_fp16")]; + tensor var_11230_cast_fp16 = mul(x = var_11229_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11230_cast_fp16")]; + tensor query_states_61_cast_fp16 = add(x = var_11205_cast_fp16, y = var_11230_cast_fp16)[name = string("query_states_61_cast_fp16")]; + tensor var_11233_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11233_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_587_promoted_to_fp16 = const()[name = string("const_587_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11254_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_587_promoted_to_fp16)[name = string("op_11254_cast_fp16")]; + int32 var_11256 = const()[name = string("op_11256"), val = int32(-1)]; + bool var_11257_interleave_0 = const()[name = string("op_11257_interleave_0"), val = bool(false)]; + tensor var_11257_cast_fp16 = concat(axis = var_11256, interleave = var_11257_interleave_0, values = (var_11254_cast_fp16, x1_63_cast_fp16))[name = string("op_11257_cast_fp16")]; + tensor var_11258_cast_fp16 = mul(x = var_11257_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11258_cast_fp16")]; + tensor key_states_61_cast_fp16 = add(x = var_11233_cast_fp16, y = var_11258_cast_fp16)[name = string("key_states_61_cast_fp16")]; + tensor key_slice_27_begin_0 = const()[name = string("key_slice_27_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor key_slice_27_end_0 = const()[name = string("key_slice_27_end_0"), val = tensor([14, 1, 512, 256])]; + tensor key_slice_27_end_mask_0 = const()[name = string("key_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_27_cast_fp16 = slice_by_index(begin = key_slice_27_begin_0, end = key_slice_27_end_0, end_mask = key_slice_27_end_mask_0, x = coreml_update_state_81)[name = string("key_slice_27_cast_fp16")]; + tensor key_tail_27_begin_0 = const()[name = string("key_tail_27_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_27_end_0 = const()[name = string("key_tail_27_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_27_cast_fp16 = slice_by_index(begin = key_tail_27_begin_0, end = key_tail_27_end_0, x = key_slice_27_cast_fp16)[name = string("key_tail_27_cast_fp16")]; + int32 var_11271 = const()[name = string("op_11271"), val = int32(2)]; + bool shifted_key_27_interleave_0 = const()[name = string("shifted_key_27_interleave_0"), val = bool(false)]; + tensor shifted_key_27_cast_fp16 = concat(axis = var_11271, interleave = shifted_key_27_interleave_0, values = (key_tail_27_cast_fp16, key_states_61_cast_fp16))[name = string("shifted_key_27_cast_fp16")]; + tensor concat_68 = const()[name = string("concat_68"), val = tensor([13, 0, 0, 0])]; + tensor concat_69 = const()[name = string("concat_69"), val = tensor([14, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_68, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_69, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = shifted_key_27_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_82")]; + tensor value_slice_27_begin_0 = const()[name = string("value_slice_27_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor value_slice_27_end_0 = const()[name = string("value_slice_27_end_0"), val = tensor([36, 1, 512, 256])]; + tensor value_slice_27_end_mask_0 = const()[name = string("value_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_27_cast_fp16 = slice_by_index(begin = value_slice_27_begin_0, end = value_slice_27_end_0, end_mask = value_slice_27_end_mask_0, x = coreml_update_state_82)[name = string("value_slice_27_cast_fp16")]; + tensor value_tail_27_begin_0 = const()[name = string("value_tail_27_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_27_end_0 = const()[name = string("value_tail_27_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_27_cast_fp16 = slice_by_index(begin = value_tail_27_begin_0, end = value_tail_27_end_0, x = value_slice_27_cast_fp16)[name = string("value_tail_27_cast_fp16")]; + int32 var_11305 = const()[name = string("op_11305"), val = int32(2)]; + bool shifted_value_27_interleave_0 = const()[name = string("shifted_value_27_interleave_0"), val = bool(false)]; + tensor shifted_value_27_cast_fp16 = concat(axis = var_11305, interleave = shifted_value_27_interleave_0, values = (value_tail_27_cast_fp16, var_11145))[name = string("shifted_value_27_cast_fp16")]; + tensor concat_70 = const()[name = string("concat_70"), val = tensor([35, 0, 0, 0])]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([36, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = shifted_value_27_cast_fp16, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_83")]; + tensor var_11333_begin_0 = const()[name = string("op_11333_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_11333_end_0 = const()[name = string("op_11333_end_0"), val = tensor([14, 1, 512, 256])]; + tensor var_11333_end_mask_0 = const()[name = string("op_11333_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11333_cast_fp16 = slice_by_index(begin = var_11333_begin_0, end = var_11333_end_0, end_mask = var_11333_end_mask_0, x = coreml_update_state_83)[name = string("op_11333_cast_fp16")]; + tensor var_11340_begin_0 = const()[name = string("op_11340_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_11340_end_0 = const()[name = string("op_11340_end_0"), val = tensor([36, 1, 512, 256])]; + tensor var_11340_end_mask_0 = const()[name = string("op_11340_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11340_cast_fp16 = slice_by_index(begin = var_11340_begin_0, end = var_11340_end_0, end_mask = var_11340_end_mask_0, x = coreml_update_state_83)[name = string("op_11340_cast_fp16")]; + tensor var_11377 = const()[name = string("op_11377"), val = tensor([1, 4, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_11377, x = var_11333_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_11397 = const()[name = string("op_11397"), val = tensor([1, 4, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_11397, x = var_11340_cast_fp16)[name = string("x_251_cast_fp16")]; + bool var_11424_transpose_x_1 = const()[name = string("op_11424_transpose_x_1"), val = bool(false)]; + bool var_11424_transpose_y_1 = const()[name = string("op_11424_transpose_y_1"), val = bool(true)]; + tensor var_11424 = matmul(transpose_x = var_11424_transpose_x_1, transpose_y = var_11424_transpose_y_1, x = query_states_61_cast_fp16, y = x_245_cast_fp16)[name = string("op_11424")]; + fp16 var_11425_to_fp16 = const()[name = string("op_11425_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_91_cast_fp16 = mul(x = var_11424, y = var_11425_to_fp16)[name = string("attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = var_2105)[name = string("attn_weights_93_cast_fp16")]; + int32 var_11460 = const()[name = string("op_11460"), val = int32(-1)]; + tensor attn_weights_95_cast_fp16 = softmax(axis = var_11460, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; + bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; + bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; + tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = x_251_cast_fp16)[name = string("attn_output_151_cast_fp16")]; + tensor var_11471_perm_0 = const()[name = string("op_11471_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11475 = const()[name = string("op_11475"), val = tensor([1, 1, 1024])]; + tensor var_11471_cast_fp16 = transpose(perm = var_11471_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_81")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_11475, x = var_11471_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_11480 = const()[name = string("op_11480"), val = tensor([0, 2, 1])]; + string var_11496_pad_type_0 = const()[name = string("op_11496_pad_type_0"), val = string("valid")]; + int32 var_11496_groups_0 = const()[name = string("op_11496_groups_0"), val = int32(1)]; + tensor var_11496_strides_0 = const()[name = string("op_11496_strides_0"), val = tensor([1])]; + tensor var_11496_pad_0 = const()[name = string("op_11496_pad_0"), val = tensor([0, 0])]; + tensor var_11496_dilations_0 = const()[name = string("op_11496_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678036224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678921024))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11481_cast_fp16 = transpose(perm = var_11480, x = attn_output_155_cast_fp16)[name = string("transpose_80")]; + tensor var_11496_cast_fp16 = conv(dilations = var_11496_dilations_0, groups = var_11496_groups_0, pad = var_11496_pad_0, pad_type = var_11496_pad_type_0, strides = var_11496_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_11481_cast_fp16)[name = string("op_11496_cast_fp16")]; + tensor var_11500 = const()[name = string("op_11500"), val = tensor([0, 2, 1])]; + int32 var_11511 = const()[name = string("op_11511"), val = int32(-1)]; + fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_253_cast_fp16 = transpose(perm = var_11500, x = var_11496_cast_fp16)[name = string("transpose_79")]; + tensor var_11513_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_596_promoted_to_fp16)[name = string("op_11513_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_11511, interleave = input_311_interleave_0, values = (hidden_states_253_cast_fp16, var_11513_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_11508_to_fp16 = const()[name = string("op_11508_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_11508_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; + tensor var_11527_to_fp16 = const()[name = string("op_11527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678957952)))]; + tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_11527_to_fp16)[name = string("attn_output_159_cast_fp16")]; + tensor hidden_states_255_cast_fp16 = add(x = hidden_states_245_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + int32 var_11540 = const()[name = string("op_11540"), val = int32(-1)]; + fp16 const_600_promoted_to_fp16 = const()[name = string("const_600_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11542_cast_fp16 = mul(x = hidden_states_255_cast_fp16, y = const_600_promoted_to_fp16)[name = string("op_11542_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_11540, interleave = input_313_interleave_0, values = (hidden_states_255_cast_fp16, var_11542_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_11537_to_fp16 = const()[name = string("op_11537_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_11537_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; + tensor var_11556_to_fp16 = const()[name = string("op_11556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678960320)))]; + tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_11556_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_11568 = const()[name = string("op_11568"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_11569_cast_fp16 = transpose(perm = var_11568, x = x_253_cast_fp16)[name = string("transpose_78")]; + tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_11569_cast_fp16)[name = string("input_315_cast_fp16")]; + string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; + tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; + tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; + int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678962688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684934720))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685155968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691128000))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; + string var_11594_mode_0 = const()[name = string("op_11594_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11594_cast_fp16 = gelu(mode = var_11594_mode_0, x = x_255_cast_fp16)[name = string("op_11594_cast_fp16")]; + tensor input_317_cast_fp16 = mul(x = var_11594_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691349248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697321280))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; + tensor var_11602_axes_0 = const()[name = string("op_11602_axes_0"), val = tensor([2])]; + tensor var_11602_cast_fp16 = squeeze(axes = var_11602_axes_0, x = e_31_cast_fp16)[name = string("op_11602_cast_fp16")]; + tensor var_11603 = const()[name = string("op_11603"), val = tensor([0, 2, 1])]; + int32 var_11614 = const()[name = string("op_11614"), val = int32(-1)]; + fp16 const_604_promoted_to_fp16 = const()[name = string("const_604_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_257_cast_fp16 = transpose(perm = var_11603, x = var_11602_cast_fp16)[name = string("transpose_77")]; + tensor var_11616_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_604_promoted_to_fp16)[name = string("op_11616_cast_fp16")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319_cast_fp16 = concat(axis = var_11614, interleave = input_319_interleave_0, values = (hidden_states_257_cast_fp16, var_11616_cast_fp16))[name = string("input_319_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_11611_to_fp16 = const()[name = string("op_11611_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_11611_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor var_11630_to_fp16 = const()[name = string("op_11630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697358208)))]; + tensor hidden_states_259_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_11630_to_fp16)[name = string("hidden_states_259_cast_fp16")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_255_cast_fp16, y = hidden_states_259_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; + int32 var_11681 = const()[name = string("op_11681"), val = int32(-1)]; + fp16 const_608_promoted_to_fp16 = const()[name = string("const_608_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11683_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_608_promoted_to_fp16)[name = string("op_11683_cast_fp16")]; + bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; + tensor input_321_cast_fp16 = concat(axis = var_11681, interleave = input_321_interleave_0, values = (hidden_states_261_cast_fp16, var_11683_cast_fp16))[name = string("input_321_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_11678_to_fp16 = const()[name = string("op_11678_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_11678_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor var_11697_to_fp16 = const()[name = string("op_11697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697360576)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_11697_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_11702 = const()[name = string("op_11702"), val = tensor([0, 2, 1])]; + tensor var_11705_axes_0 = const()[name = string("op_11705_axes_0"), val = tensor([2])]; + tensor var_11703_cast_fp16 = transpose(perm = var_11702, x = hidden_states_263_cast_fp16)[name = string("transpose_76")]; + tensor var_11705_cast_fp16 = expand_dims(axes = var_11705_axes_0, x = var_11703_cast_fp16)[name = string("op_11705_cast_fp16")]; + string var_11721_pad_type_0 = const()[name = string("op_11721_pad_type_0"), val = string("valid")]; + tensor var_11721_strides_0 = const()[name = string("op_11721_strides_0"), val = tensor([1, 1])]; + tensor var_11721_pad_0 = const()[name = string("op_11721_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11721_dilations_0 = const()[name = string("op_11721_dilations_0"), val = tensor([1, 1])]; + int32 var_11721_groups_0 = const()[name = string("op_11721_groups_0"), val = int32(1)]; + tensor var_11721 = conv(dilations = var_11721_dilations_0, groups = var_11721_groups_0, pad = var_11721_pad_0, pad_type = var_11721_pad_type_0, strides = var_11721_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_11705_cast_fp16)[name = string("op_11721")]; + tensor var_11726 = const()[name = string("op_11726"), val = tensor([1, 4, 1, 256])]; + tensor var_11727 = reshape(shape = var_11726, x = var_11721)[name = string("op_11727")]; + string var_11743_pad_type_0 = const()[name = string("op_11743_pad_type_0"), val = string("valid")]; + tensor var_11743_strides_0 = const()[name = string("op_11743_strides_0"), val = tensor([1, 1])]; + tensor var_11743_pad_0 = const()[name = string("op_11743_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11743_dilations_0 = const()[name = string("op_11743_dilations_0"), val = tensor([1, 1])]; + int32 var_11743_groups_0 = const()[name = string("op_11743_groups_0"), val = int32(1)]; + tensor var_11743 = conv(dilations = var_11743_dilations_0, groups = var_11743_groups_0, pad = var_11743_pad_0, pad_type = var_11743_pad_type_0, strides = var_11743_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_11705_cast_fp16)[name = string("op_11743")]; + tensor var_11748 = const()[name = string("op_11748"), val = tensor([1, 1, 1, 256])]; + tensor var_11749 = reshape(shape = var_11748, x = var_11743)[name = string("op_11749")]; + string var_11765_pad_type_0 = const()[name = string("op_11765_pad_type_0"), val = string("valid")]; + tensor var_11765_strides_0 = const()[name = string("op_11765_strides_0"), val = tensor([1, 1])]; + tensor var_11765_pad_0 = const()[name = string("op_11765_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11765_dilations_0 = const()[name = string("op_11765_dilations_0"), val = tensor([1, 1])]; + int32 var_11765_groups_0 = const()[name = string("op_11765_groups_0"), val = int32(1)]; + tensor var_11765 = conv(dilations = var_11765_dilations_0, groups = var_11765_groups_0, pad = var_11765_pad_0, pad_type = var_11765_pad_type_0, strides = var_11765_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_11705_cast_fp16)[name = string("op_11765")]; + tensor var_11770 = const()[name = string("op_11770"), val = tensor([1, 1, 1, 256])]; + tensor var_11771 = reshape(shape = var_11770, x = var_11765)[name = string("op_11771")]; + int32 var_11786 = const()[name = string("op_11786"), val = int32(-1)]; + fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; + tensor var_11788 = mul(x = var_11727, y = const_612_promoted)[name = string("op_11788")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325 = concat(axis = var_11786, interleave = input_325_interleave_0, values = (var_11727, var_11788))[name = string("input_325")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_11783_to_fp16 = const()[name = string("op_11783_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_11783_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor var_11802_to_fp16 = const()[name = string("op_11802_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697362944)))]; + tensor q_33_cast_fp16 = mul(x = normed_391, y = var_11802_to_fp16)[name = string("q_33_cast_fp16")]; + int32 var_11813 = const()[name = string("op_11813"), val = int32(-1)]; + fp16 const_616_promoted = const()[name = string("const_616_promoted"), val = fp16(-0x1p+0)]; + tensor var_11815 = mul(x = var_11749, y = const_616_promoted)[name = string("op_11815")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327 = concat(axis = var_11813, interleave = input_327_interleave_0, values = (var_11749, var_11815))[name = string("input_327")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_11810_to_fp16 = const()[name = string("op_11810_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_11810_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor var_11829_to_fp16 = const()[name = string("op_11829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697363520)))]; + tensor k_33_cast_fp16 = mul(x = normed_395, y = var_11829_to_fp16)[name = string("k_33_cast_fp16")]; + tensor var_11831_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11831_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_622_promoted_to_fp16 = const()[name = string("const_622_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11852_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_622_promoted_to_fp16)[name = string("op_11852_cast_fp16")]; + int32 var_11854 = const()[name = string("op_11854"), val = int32(-1)]; + bool var_11855_interleave_0 = const()[name = string("op_11855_interleave_0"), val = bool(false)]; + tensor var_11855_cast_fp16 = concat(axis = var_11854, interleave = var_11855_interleave_0, values = (var_11852_cast_fp16, x1_65_cast_fp16))[name = string("op_11855_cast_fp16")]; + tensor var_11856_cast_fp16 = mul(x = var_11855_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11856_cast_fp16")]; + tensor query_states_65_cast_fp16 = add(x = var_11831_cast_fp16, y = var_11856_cast_fp16)[name = string("query_states_65_cast_fp16")]; + tensor var_11859_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11859_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_625_promoted_to_fp16 = const()[name = string("const_625_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11880_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_625_promoted_to_fp16)[name = string("op_11880_cast_fp16")]; + int32 var_11882 = const()[name = string("op_11882"), val = int32(-1)]; + bool var_11883_interleave_0 = const()[name = string("op_11883_interleave_0"), val = bool(false)]; + tensor var_11883_cast_fp16 = concat(axis = var_11882, interleave = var_11883_interleave_0, values = (var_11880_cast_fp16, x1_67_cast_fp16))[name = string("op_11883_cast_fp16")]; + tensor var_11884_cast_fp16 = mul(x = var_11883_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11884_cast_fp16")]; + tensor key_states_65_cast_fp16 = add(x = var_11859_cast_fp16, y = var_11884_cast_fp16)[name = string("key_states_65_cast_fp16")]; + tensor key_slice_29_begin_0 = const()[name = string("key_slice_29_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor key_slice_29_end_0 = const()[name = string("key_slice_29_end_0"), val = tensor([15, 1, 512, 256])]; + tensor key_slice_29_end_mask_0 = const()[name = string("key_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_29_cast_fp16 = slice_by_index(begin = key_slice_29_begin_0, end = key_slice_29_end_0, end_mask = key_slice_29_end_mask_0, x = coreml_update_state_83)[name = string("key_slice_29_cast_fp16")]; + tensor key_tail_29_begin_0 = const()[name = string("key_tail_29_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_29_end_0 = const()[name = string("key_tail_29_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_29_cast_fp16 = slice_by_index(begin = key_tail_29_begin_0, end = key_tail_29_end_0, x = key_slice_29_cast_fp16)[name = string("key_tail_29_cast_fp16")]; + int32 var_11897 = const()[name = string("op_11897"), val = int32(2)]; + bool shifted_key_29_interleave_0 = const()[name = string("shifted_key_29_interleave_0"), val = bool(false)]; + tensor shifted_key_29_cast_fp16 = concat(axis = var_11897, interleave = shifted_key_29_interleave_0, values = (key_tail_29_cast_fp16, key_states_65_cast_fp16))[name = string("shifted_key_29_cast_fp16")]; + tensor concat_72 = const()[name = string("concat_72"), val = tensor([14, 0, 0, 0])]; + tensor concat_73 = const()[name = string("concat_73"), val = tensor([15, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_72, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_73, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = shifted_key_29_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_84")]; + tensor value_slice_29_begin_0 = const()[name = string("value_slice_29_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor value_slice_29_end_0 = const()[name = string("value_slice_29_end_0"), val = tensor([37, 1, 512, 256])]; + tensor value_slice_29_end_mask_0 = const()[name = string("value_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_29_cast_fp16 = slice_by_index(begin = value_slice_29_begin_0, end = value_slice_29_end_0, end_mask = value_slice_29_end_mask_0, x = coreml_update_state_84)[name = string("value_slice_29_cast_fp16")]; + tensor value_tail_29_begin_0 = const()[name = string("value_tail_29_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_29_end_0 = const()[name = string("value_tail_29_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_29_cast_fp16 = slice_by_index(begin = value_tail_29_begin_0, end = value_tail_29_end_0, x = value_slice_29_cast_fp16)[name = string("value_tail_29_cast_fp16")]; + int32 var_11931 = const()[name = string("op_11931"), val = int32(2)]; + bool shifted_value_29_interleave_0 = const()[name = string("shifted_value_29_interleave_0"), val = bool(false)]; + tensor shifted_value_29_cast_fp16 = concat(axis = var_11931, interleave = shifted_value_29_interleave_0, values = (value_tail_29_cast_fp16, var_11771))[name = string("shifted_value_29_cast_fp16")]; + tensor concat_74 = const()[name = string("concat_74"), val = tensor([36, 0, 0, 0])]; + tensor concat_75 = const()[name = string("concat_75"), val = tensor([37, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = shifted_value_29_cast_fp16, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_85")]; + tensor var_11959_begin_0 = const()[name = string("op_11959_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_11959_end_0 = const()[name = string("op_11959_end_0"), val = tensor([15, 1, 512, 256])]; + tensor var_11959_end_mask_0 = const()[name = string("op_11959_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11959_cast_fp16 = slice_by_index(begin = var_11959_begin_0, end = var_11959_end_0, end_mask = var_11959_end_mask_0, x = coreml_update_state_85)[name = string("op_11959_cast_fp16")]; + tensor var_11966_begin_0 = const()[name = string("op_11966_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_11966_end_0 = const()[name = string("op_11966_end_0"), val = tensor([37, 1, 512, 256])]; + tensor var_11966_end_mask_0 = const()[name = string("op_11966_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11966_cast_fp16 = slice_by_index(begin = var_11966_begin_0, end = var_11966_end_0, end_mask = var_11966_end_mask_0, x = coreml_update_state_85)[name = string("op_11966_cast_fp16")]; + tensor var_12003 = const()[name = string("op_12003"), val = tensor([1, 4, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_12003, x = var_11959_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_12023 = const()[name = string("op_12023"), val = tensor([1, 4, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_12023, x = var_11966_cast_fp16)[name = string("x_267_cast_fp16")]; + bool var_12050_transpose_x_1 = const()[name = string("op_12050_transpose_x_1"), val = bool(false)]; + bool var_12050_transpose_y_1 = const()[name = string("op_12050_transpose_y_1"), val = bool(true)]; + tensor var_12050 = matmul(transpose_x = var_12050_transpose_x_1, transpose_y = var_12050_transpose_y_1, x = query_states_65_cast_fp16, y = x_261_cast_fp16)[name = string("op_12050")]; + fp16 var_12051_to_fp16 = const()[name = string("op_12051_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_12050, y = var_12051_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = var_2105)[name = string("attn_weights_99_cast_fp16")]; + int32 var_12086 = const()[name = string("op_12086"), val = int32(-1)]; + tensor attn_weights_101_cast_fp16 = softmax(axis = var_12086, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; + bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; + bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; + tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = x_267_cast_fp16)[name = string("attn_output_161_cast_fp16")]; + tensor var_12097_perm_0 = const()[name = string("op_12097_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12101 = const()[name = string("op_12101"), val = tensor([1, 1, 1024])]; + tensor var_12097_cast_fp16 = transpose(perm = var_12097_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_75")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_12101, x = var_12097_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_12106 = const()[name = string("op_12106"), val = tensor([0, 2, 1])]; + string var_12122_pad_type_0 = const()[name = string("op_12122_pad_type_0"), val = string("valid")]; + int32 var_12122_groups_0 = const()[name = string("op_12122_groups_0"), val = int32(1)]; + tensor var_12122_strides_0 = const()[name = string("op_12122_strides_0"), val = tensor([1])]; + tensor var_12122_pad_0 = const()[name = string("op_12122_pad_0"), val = tensor([0, 0])]; + tensor var_12122_dilations_0 = const()[name = string("op_12122_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697364096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698248896))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12107_cast_fp16 = transpose(perm = var_12106, x = attn_output_165_cast_fp16)[name = string("transpose_74")]; + tensor var_12122_cast_fp16 = conv(dilations = var_12122_dilations_0, groups = var_12122_groups_0, pad = var_12122_pad_0, pad_type = var_12122_pad_type_0, strides = var_12122_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_12107_cast_fp16)[name = string("op_12122_cast_fp16")]; + tensor var_12126 = const()[name = string("op_12126"), val = tensor([0, 2, 1])]; + int32 var_12137 = const()[name = string("op_12137"), val = int32(-1)]; + fp16 const_634_promoted_to_fp16 = const()[name = string("const_634_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_269_cast_fp16 = transpose(perm = var_12126, x = var_12122_cast_fp16)[name = string("transpose_73")]; + tensor var_12139_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_634_promoted_to_fp16)[name = string("op_12139_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_12137, interleave = input_331_interleave_0, values = (hidden_states_269_cast_fp16, var_12139_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_12134_to_fp16 = const()[name = string("op_12134_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_12134_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor var_12153_to_fp16 = const()[name = string("op_12153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698285824)))]; + tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_12153_to_fp16)[name = string("attn_output_169_cast_fp16")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; + int32 var_12166 = const()[name = string("op_12166"), val = int32(-1)]; + fp16 const_638_promoted_to_fp16 = const()[name = string("const_638_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12168_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_638_promoted_to_fp16)[name = string("op_12168_cast_fp16")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333_cast_fp16 = concat(axis = var_12166, interleave = input_333_interleave_0, values = (hidden_states_271_cast_fp16, var_12168_cast_fp16))[name = string("input_333_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_12163_to_fp16 = const()[name = string("op_12163_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_12163_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor var_12182_to_fp16 = const()[name = string("op_12182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698288192)))]; + tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_12182_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_12194 = const()[name = string("op_12194"), val = tensor([0, 2, 1])]; + tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; + tensor var_12195_cast_fp16 = transpose(perm = var_12194, x = x_269_cast_fp16)[name = string("transpose_72")]; + tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_12195_cast_fp16)[name = string("input_335_cast_fp16")]; + string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; + tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; + tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; + int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704262592))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704483840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710455872))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; + string var_12220_mode_0 = const()[name = string("op_12220_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12220_cast_fp16 = gelu(mode = var_12220_mode_0, x = x_271_cast_fp16)[name = string("op_12220_cast_fp16")]; + tensor input_337_cast_fp16 = mul(x = var_12220_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710677120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716649152))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; + tensor var_12228_axes_0 = const()[name = string("op_12228_axes_0"), val = tensor([2])]; + tensor var_12228_cast_fp16 = squeeze(axes = var_12228_axes_0, x = e_33_cast_fp16)[name = string("op_12228_cast_fp16")]; + tensor var_12229 = const()[name = string("op_12229"), val = tensor([0, 2, 1])]; + int32 var_12240 = const()[name = string("op_12240"), val = int32(-1)]; + fp16 const_642_promoted_to_fp16 = const()[name = string("const_642_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_273_cast_fp16 = transpose(perm = var_12229, x = var_12228_cast_fp16)[name = string("transpose_71")]; + tensor var_12242_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_642_promoted_to_fp16)[name = string("op_12242_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_12240, interleave = input_339_interleave_0, values = (hidden_states_273_cast_fp16, var_12242_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_12237_to_fp16 = const()[name = string("op_12237_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_12237_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; + tensor var_12256_to_fp16 = const()[name = string("op_12256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716686080)))]; + tensor hidden_states_275_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_12256_to_fp16)[name = string("hidden_states_275_cast_fp16")]; + tensor hidden_states_277_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = hidden_states_275_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + int32 var_12307 = const()[name = string("op_12307"), val = int32(-1)]; + fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12309_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12309_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_12307, interleave = input_341_interleave_0, values = (hidden_states_277_cast_fp16, var_12309_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_12304_to_fp16 = const()[name = string("op_12304_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_12304_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; + tensor var_12323_to_fp16 = const()[name = string("op_12323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716688448)))]; + tensor hidden_states_279_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_12323_to_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor var_12328 = const()[name = string("op_12328"), val = tensor([0, 2, 1])]; + tensor var_12331_axes_0 = const()[name = string("op_12331_axes_0"), val = tensor([2])]; + tensor var_12329_cast_fp16 = transpose(perm = var_12328, x = hidden_states_279_cast_fp16)[name = string("transpose_70")]; + tensor var_12331_cast_fp16 = expand_dims(axes = var_12331_axes_0, x = var_12329_cast_fp16)[name = string("op_12331_cast_fp16")]; + string var_12347_pad_type_0 = const()[name = string("op_12347_pad_type_0"), val = string("valid")]; + tensor var_12347_strides_0 = const()[name = string("op_12347_strides_0"), val = tensor([1, 1])]; + tensor var_12347_pad_0 = const()[name = string("op_12347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12347_dilations_0 = const()[name = string("op_12347_dilations_0"), val = tensor([1, 1])]; + int32 var_12347_groups_0 = const()[name = string("op_12347_groups_0"), val = int32(1)]; + tensor var_12347 = conv(dilations = var_12347_dilations_0, groups = var_12347_groups_0, pad = var_12347_pad_0, pad_type = var_12347_pad_type_0, strides = var_12347_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_12331_cast_fp16)[name = string("op_12347")]; + tensor var_12352 = const()[name = string("op_12352"), val = tensor([1, 4, 1, 256])]; + tensor var_12353 = reshape(shape = var_12352, x = var_12347)[name = string("op_12353")]; + string var_12369_pad_type_0 = const()[name = string("op_12369_pad_type_0"), val = string("valid")]; + tensor var_12369_strides_0 = const()[name = string("op_12369_strides_0"), val = tensor([1, 1])]; + tensor var_12369_pad_0 = const()[name = string("op_12369_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12369_dilations_0 = const()[name = string("op_12369_dilations_0"), val = tensor([1, 1])]; + int32 var_12369_groups_0 = const()[name = string("op_12369_groups_0"), val = int32(1)]; + tensor var_12369 = conv(dilations = var_12369_dilations_0, groups = var_12369_groups_0, pad = var_12369_pad_0, pad_type = var_12369_pad_type_0, strides = var_12369_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_12331_cast_fp16)[name = string("op_12369")]; + tensor var_12374 = const()[name = string("op_12374"), val = tensor([1, 1, 1, 256])]; + tensor var_12375 = reshape(shape = var_12374, x = var_12369)[name = string("op_12375")]; + string var_12391_pad_type_0 = const()[name = string("op_12391_pad_type_0"), val = string("valid")]; + tensor var_12391_strides_0 = const()[name = string("op_12391_strides_0"), val = tensor([1, 1])]; + tensor var_12391_pad_0 = const()[name = string("op_12391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12391_dilations_0 = const()[name = string("op_12391_dilations_0"), val = tensor([1, 1])]; + int32 var_12391_groups_0 = const()[name = string("op_12391_groups_0"), val = int32(1)]; + tensor var_12391 = conv(dilations = var_12391_dilations_0, groups = var_12391_groups_0, pad = var_12391_pad_0, pad_type = var_12391_pad_type_0, strides = var_12391_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_12331_cast_fp16)[name = string("op_12391")]; + tensor var_12396 = const()[name = string("op_12396"), val = tensor([1, 1, 1, 256])]; + tensor var_12397 = reshape(shape = var_12396, x = var_12391)[name = string("op_12397")]; + int32 var_12412 = const()[name = string("op_12412"), val = int32(-1)]; + fp16 const_650_promoted = const()[name = string("const_650_promoted"), val = fp16(-0x1p+0)]; + tensor var_12414 = mul(x = var_12353, y = const_650_promoted)[name = string("op_12414")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_12412, interleave = input_345_interleave_0, values = (var_12353, var_12414))[name = string("input_345")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_12409_to_fp16 = const()[name = string("op_12409_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_12409_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; + tensor var_12428_to_fp16 = const()[name = string("op_12428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716690816)))]; + tensor q_35_cast_fp16 = mul(x = normed_415, y = var_12428_to_fp16)[name = string("q_35_cast_fp16")]; + int32 var_12439 = const()[name = string("op_12439"), val = int32(-1)]; + fp16 const_654_promoted = const()[name = string("const_654_promoted"), val = fp16(-0x1p+0)]; + tensor var_12441 = mul(x = var_12375, y = const_654_promoted)[name = string("op_12441")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_12439, interleave = input_347_interleave_0, values = (var_12375, var_12441))[name = string("input_347")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_12436_to_fp16 = const()[name = string("op_12436_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_12436_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; + tensor var_12455_to_fp16 = const()[name = string("op_12455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691392)))]; + tensor k_35_cast_fp16 = mul(x = normed_419, y = var_12455_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_12457_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12457_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_660_promoted_to_fp16 = const()[name = string("const_660_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12478_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_660_promoted_to_fp16)[name = string("op_12478_cast_fp16")]; + int32 var_12480 = const()[name = string("op_12480"), val = int32(-1)]; + bool var_12481_interleave_0 = const()[name = string("op_12481_interleave_0"), val = bool(false)]; + tensor var_12481_cast_fp16 = concat(axis = var_12480, interleave = var_12481_interleave_0, values = (var_12478_cast_fp16, x1_69_cast_fp16))[name = string("op_12481_cast_fp16")]; + tensor var_12482_cast_fp16 = mul(x = var_12481_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12482_cast_fp16")]; + tensor query_states_69_cast_fp16 = add(x = var_12457_cast_fp16, y = var_12482_cast_fp16)[name = string("query_states_69_cast_fp16")]; + tensor var_12485_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12485_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_663_promoted_to_fp16 = const()[name = string("const_663_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12506_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_663_promoted_to_fp16)[name = string("op_12506_cast_fp16")]; + int32 var_12508 = const()[name = string("op_12508"), val = int32(-1)]; + bool var_12509_interleave_0 = const()[name = string("op_12509_interleave_0"), val = bool(false)]; + tensor var_12509_cast_fp16 = concat(axis = var_12508, interleave = var_12509_interleave_0, values = (var_12506_cast_fp16, x1_71_cast_fp16))[name = string("op_12509_cast_fp16")]; + tensor var_12510_cast_fp16 = mul(x = var_12509_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12510_cast_fp16")]; + tensor key_states_69_cast_fp16 = add(x = var_12485_cast_fp16, y = var_12510_cast_fp16)[name = string("key_states_69_cast_fp16")]; + tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([2])]; + tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; + tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([3])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_178, concat_79_values1_0, var_5043, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_69_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_86")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([6])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([7])]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_82")]; + tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; + tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; + int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; + bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; + tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_184, concat_83_values1_0, var_5043, concat_83_values3_0))[name = string("concat_83")]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = var_12397, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_87")]; + tensor var_12565_begin_0 = const()[name = string("op_12565_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_12565_end_0 = const()[name = string("op_12565_end_0"), val = tensor([3, 1, 4096, 256])]; + tensor var_12565_end_mask_0 = const()[name = string("op_12565_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12565_cast_fp16 = slice_by_index(begin = var_12565_begin_0, end = var_12565_end_0, end_mask = var_12565_end_mask_0, x = coreml_update_state_87)[name = string("op_12565_cast_fp16")]; + tensor var_12572_begin_0 = const()[name = string("op_12572_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_12572_end_0 = const()[name = string("op_12572_end_0"), val = tensor([7, 1, 4096, 256])]; + tensor var_12572_end_mask_0 = const()[name = string("op_12572_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12572_cast_fp16 = slice_by_index(begin = var_12572_begin_0, end = var_12572_end_0, end_mask = var_12572_end_mask_0, x = coreml_update_state_87)[name = string("op_12572_cast_fp16")]; + tensor var_12609 = const()[name = string("op_12609"), val = tensor([1, 4, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_12609, x = var_12565_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_12629 = const()[name = string("op_12629"), val = tensor([1, 4, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_12629, x = var_12572_cast_fp16)[name = string("x_283_cast_fp16")]; + bool var_12656_transpose_x_1 = const()[name = string("op_12656_transpose_x_1"), val = bool(false)]; + bool var_12656_transpose_y_1 = const()[name = string("op_12656_transpose_y_1"), val = bool(true)]; + tensor var_12656 = matmul(transpose_x = var_12656_transpose_x_1, transpose_y = var_12656_transpose_y_1, x = query_states_69_cast_fp16, y = x_277_cast_fp16)[name = string("op_12656")]; + fp16 var_12657_to_fp16 = const()[name = string("op_12657_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_103_cast_fp16 = mul(x = var_12656, y = var_12657_to_fp16)[name = string("attn_weights_103_cast_fp16")]; + tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; + int32 var_12692 = const()[name = string("op_12692"), val = int32(-1)]; + tensor attn_weights_107_cast_fp16 = softmax(axis = var_12692, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; + bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; + bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; + tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = x_283_cast_fp16)[name = string("attn_output_171_cast_fp16")]; + tensor var_12703_perm_0 = const()[name = string("op_12703_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12707 = const()[name = string("op_12707"), val = tensor([1, 1, 1024])]; + tensor var_12703_cast_fp16 = transpose(perm = var_12703_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_69")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_12707, x = var_12703_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_12712 = const()[name = string("op_12712"), val = tensor([0, 2, 1])]; + string var_12728_pad_type_0 = const()[name = string("op_12728_pad_type_0"), val = string("valid")]; + int32 var_12728_groups_0 = const()[name = string("op_12728_groups_0"), val = int32(1)]; + tensor var_12728_strides_0 = const()[name = string("op_12728_strides_0"), val = tensor([1])]; + tensor var_12728_pad_0 = const()[name = string("op_12728_pad_0"), val = tensor([0, 0])]; + tensor var_12728_dilations_0 = const()[name = string("op_12728_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717576768))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12713_cast_fp16 = transpose(perm = var_12712, x = attn_output_175_cast_fp16)[name = string("transpose_68")]; + tensor var_12728_cast_fp16 = conv(dilations = var_12728_dilations_0, groups = var_12728_groups_0, pad = var_12728_pad_0, pad_type = var_12728_pad_type_0, strides = var_12728_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_12713_cast_fp16)[name = string("op_12728_cast_fp16")]; + tensor var_12732 = const()[name = string("op_12732"), val = tensor([0, 2, 1])]; + int32 var_12743 = const()[name = string("op_12743"), val = int32(-1)]; + fp16 const_672_promoted_to_fp16 = const()[name = string("const_672_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_285_cast_fp16 = transpose(perm = var_12732, x = var_12728_cast_fp16)[name = string("transpose_67")]; + tensor var_12745_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_672_promoted_to_fp16)[name = string("op_12745_cast_fp16")]; + bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; + tensor input_351_cast_fp16 = concat(axis = var_12743, interleave = input_351_interleave_0, values = (hidden_states_285_cast_fp16, var_12745_cast_fp16))[name = string("input_351_cast_fp16")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_12740_to_fp16 = const()[name = string("op_12740_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_12740_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; + tensor var_12759_to_fp16 = const()[name = string("op_12759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717613696)))]; + tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_12759_to_fp16)[name = string("attn_output_179_cast_fp16")]; + tensor hidden_states_287_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; + int32 var_12772 = const()[name = string("op_12772"), val = int32(-1)]; + fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12774_cast_fp16 = mul(x = hidden_states_287_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12774_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_12772, interleave = input_353_interleave_0, values = (hidden_states_287_cast_fp16, var_12774_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_12769_to_fp16 = const()[name = string("op_12769_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_12769_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; + tensor var_12788_to_fp16 = const()[name = string("op_12788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717616064)))]; + tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_12788_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_12800 = const()[name = string("op_12800"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_12801_cast_fp16 = transpose(perm = var_12800, x = x_285_cast_fp16)[name = string("transpose_66")]; + tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_12801_cast_fp16)[name = string("input_355_cast_fp16")]; + string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; + tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; + tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; + int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717618432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723590464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729783744))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; + string var_12826_mode_0 = const()[name = string("op_12826_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12826_cast_fp16 = gelu(mode = var_12826_mode_0, x = x_287_cast_fp16)[name = string("op_12826_cast_fp16")]; + tensor input_357_cast_fp16 = mul(x = var_12826_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730004992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735977024))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; + tensor var_12834_axes_0 = const()[name = string("op_12834_axes_0"), val = tensor([2])]; + tensor var_12834_cast_fp16 = squeeze(axes = var_12834_axes_0, x = e_35_cast_fp16)[name = string("op_12834_cast_fp16")]; + tensor var_12835 = const()[name = string("op_12835"), val = tensor([0, 2, 1])]; + int32 var_12846 = const()[name = string("op_12846"), val = int32(-1)]; + fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_289_cast_fp16 = transpose(perm = var_12835, x = var_12834_cast_fp16)[name = string("transpose_65")]; + tensor var_12848_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12848_cast_fp16")]; + bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; + tensor input_359_cast_fp16 = concat(axis = var_12846, interleave = input_359_interleave_0, values = (hidden_states_289_cast_fp16, var_12848_cast_fp16))[name = string("input_359_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_12843_to_fp16 = const()[name = string("op_12843_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_12843_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor var_12862_to_fp16 = const()[name = string("op_12862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736013952)))]; + tensor hidden_states_291_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_12862_to_fp16)[name = string("hidden_states_291_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = add(x = hidden_states_287_cast_fp16, y = hidden_states_291_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + int32 var_12913 = const()[name = string("op_12913"), val = int32(-1)]; + fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12915_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12915_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_12913, interleave = input_361_interleave_0, values = (hidden_states_293_cast_fp16, var_12915_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_12910_to_fp16 = const()[name = string("op_12910_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_12910_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor var_12929_to_fp16 = const()[name = string("op_12929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736016320)))]; + tensor hidden_states_295_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_12929_to_fp16)[name = string("hidden_states_295_cast_fp16")]; + tensor var_12934 = const()[name = string("op_12934"), val = tensor([0, 2, 1])]; + tensor var_12937_axes_0 = const()[name = string("op_12937_axes_0"), val = tensor([2])]; + tensor var_12935_cast_fp16 = transpose(perm = var_12934, x = hidden_states_295_cast_fp16)[name = string("transpose_64")]; + tensor var_12937_cast_fp16 = expand_dims(axes = var_12937_axes_0, x = var_12935_cast_fp16)[name = string("op_12937_cast_fp16")]; + string var_12953_pad_type_0 = const()[name = string("op_12953_pad_type_0"), val = string("valid")]; + tensor var_12953_strides_0 = const()[name = string("op_12953_strides_0"), val = tensor([1, 1])]; + tensor var_12953_pad_0 = const()[name = string("op_12953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12953_dilations_0 = const()[name = string("op_12953_dilations_0"), val = tensor([1, 1])]; + int32 var_12953_groups_0 = const()[name = string("op_12953_groups_0"), val = int32(1)]; + tensor var_12953 = conv(dilations = var_12953_dilations_0, groups = var_12953_groups_0, pad = var_12953_pad_0, pad_type = var_12953_pad_type_0, strides = var_12953_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_12937_cast_fp16)[name = string("op_12953")]; + tensor var_12958 = const()[name = string("op_12958"), val = tensor([1, 4, 1, 256])]; + tensor var_12959 = reshape(shape = var_12958, x = var_12953)[name = string("op_12959")]; + string var_12975_pad_type_0 = const()[name = string("op_12975_pad_type_0"), val = string("valid")]; + tensor var_12975_strides_0 = const()[name = string("op_12975_strides_0"), val = tensor([1, 1])]; + tensor var_12975_pad_0 = const()[name = string("op_12975_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12975_dilations_0 = const()[name = string("op_12975_dilations_0"), val = tensor([1, 1])]; + int32 var_12975_groups_0 = const()[name = string("op_12975_groups_0"), val = int32(1)]; + tensor var_12975 = conv(dilations = var_12975_dilations_0, groups = var_12975_groups_0, pad = var_12975_pad_0, pad_type = var_12975_pad_type_0, strides = var_12975_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_12937_cast_fp16)[name = string("op_12975")]; + tensor var_12980 = const()[name = string("op_12980"), val = tensor([1, 1, 1, 256])]; + tensor var_12981 = reshape(shape = var_12980, x = var_12975)[name = string("op_12981")]; + string var_12997_pad_type_0 = const()[name = string("op_12997_pad_type_0"), val = string("valid")]; + tensor var_12997_strides_0 = const()[name = string("op_12997_strides_0"), val = tensor([1, 1])]; + tensor var_12997_pad_0 = const()[name = string("op_12997_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12997_dilations_0 = const()[name = string("op_12997_dilations_0"), val = tensor([1, 1])]; + int32 var_12997_groups_0 = const()[name = string("op_12997_groups_0"), val = int32(1)]; + tensor var_12997 = conv(dilations = var_12997_dilations_0, groups = var_12997_groups_0, pad = var_12997_pad_0, pad_type = var_12997_pad_type_0, strides = var_12997_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_12937_cast_fp16)[name = string("op_12997")]; + tensor var_13002 = const()[name = string("op_13002"), val = tensor([1, 1, 1, 256])]; + tensor var_13003 = reshape(shape = var_13002, x = var_12997)[name = string("op_13003")]; + int32 var_13018 = const()[name = string("op_13018"), val = int32(-1)]; + fp16 const_688_promoted = const()[name = string("const_688_promoted"), val = fp16(-0x1p+0)]; + tensor var_13020 = mul(x = var_12959, y = const_688_promoted)[name = string("op_13020")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_13018, interleave = input_365_interleave_0, values = (var_12959, var_13020))[name = string("input_365")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_13015_to_fp16 = const()[name = string("op_13015_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_13015_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor var_13034_to_fp16 = const()[name = string("op_13034_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018688)))]; + tensor q_37_cast_fp16 = mul(x = normed_439, y = var_13034_to_fp16)[name = string("q_37_cast_fp16")]; + int32 var_13045 = const()[name = string("op_13045"), val = int32(-1)]; + fp16 const_692_promoted = const()[name = string("const_692_promoted"), val = fp16(-0x1p+0)]; + tensor var_13047 = mul(x = var_12981, y = const_692_promoted)[name = string("op_13047")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_13045, interleave = input_367_interleave_0, values = (var_12981, var_13047))[name = string("input_367")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_13042_to_fp16 = const()[name = string("op_13042_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_13042_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor var_13061_to_fp16 = const()[name = string("op_13061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019264)))]; + tensor k_37_cast_fp16 = mul(x = normed_443, y = var_13061_to_fp16)[name = string("k_37_cast_fp16")]; + tensor var_13063_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13063_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_698_promoted_to_fp16 = const()[name = string("const_698_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13084_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_698_promoted_to_fp16)[name = string("op_13084_cast_fp16")]; + int32 var_13086 = const()[name = string("op_13086"), val = int32(-1)]; + bool var_13087_interleave_0 = const()[name = string("op_13087_interleave_0"), val = bool(false)]; + tensor var_13087_cast_fp16 = concat(axis = var_13086, interleave = var_13087_interleave_0, values = (var_13084_cast_fp16, x1_73_cast_fp16))[name = string("op_13087_cast_fp16")]; + tensor var_13088_cast_fp16 = mul(x = var_13087_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13088_cast_fp16")]; + tensor query_states_73_cast_fp16 = add(x = var_13063_cast_fp16, y = var_13088_cast_fp16)[name = string("query_states_73_cast_fp16")]; + tensor var_13091_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13091_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_701_promoted_to_fp16 = const()[name = string("const_701_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13112_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_701_promoted_to_fp16)[name = string("op_13112_cast_fp16")]; + int32 var_13114 = const()[name = string("op_13114"), val = int32(-1)]; + bool var_13115_interleave_0 = const()[name = string("op_13115_interleave_0"), val = bool(false)]; + tensor var_13115_cast_fp16 = concat(axis = var_13114, interleave = var_13115_interleave_0, values = (var_13112_cast_fp16, x1_75_cast_fp16))[name = string("op_13115_cast_fp16")]; + tensor var_13116_cast_fp16 = mul(x = var_13115_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13116_cast_fp16")]; + tensor key_states_73_cast_fp16 = add(x = var_13091_cast_fp16, y = var_13116_cast_fp16)[name = string("key_states_73_cast_fp16")]; + tensor key_slice_31_begin_0 = const()[name = string("key_slice_31_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor key_slice_31_end_0 = const()[name = string("key_slice_31_end_0"), val = tensor([16, 1, 512, 256])]; + tensor key_slice_31_end_mask_0 = const()[name = string("key_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_31_cast_fp16 = slice_by_index(begin = key_slice_31_begin_0, end = key_slice_31_end_0, end_mask = key_slice_31_end_mask_0, x = coreml_update_state_85)[name = string("key_slice_31_cast_fp16")]; + tensor key_tail_31_begin_0 = const()[name = string("key_tail_31_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_31_end_0 = const()[name = string("key_tail_31_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_31_cast_fp16 = slice_by_index(begin = key_tail_31_begin_0, end = key_tail_31_end_0, x = key_slice_31_cast_fp16)[name = string("key_tail_31_cast_fp16")]; + int32 var_13129 = const()[name = string("op_13129"), val = int32(2)]; + bool shifted_key_31_interleave_0 = const()[name = string("shifted_key_31_interleave_0"), val = bool(false)]; + tensor shifted_key_31_cast_fp16 = concat(axis = var_13129, interleave = shifted_key_31_interleave_0, values = (key_tail_31_cast_fp16, key_states_73_cast_fp16))[name = string("shifted_key_31_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([15, 0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([16, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_84, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_85, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = shifted_key_31_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_88")]; + tensor value_slice_31_begin_0 = const()[name = string("value_slice_31_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor value_slice_31_end_0 = const()[name = string("value_slice_31_end_0"), val = tensor([38, 1, 512, 256])]; + tensor value_slice_31_end_mask_0 = const()[name = string("value_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_31_cast_fp16 = slice_by_index(begin = value_slice_31_begin_0, end = value_slice_31_end_0, end_mask = value_slice_31_end_mask_0, x = coreml_update_state_88)[name = string("value_slice_31_cast_fp16")]; + tensor value_tail_31_begin_0 = const()[name = string("value_tail_31_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_31_end_0 = const()[name = string("value_tail_31_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_31_cast_fp16 = slice_by_index(begin = value_tail_31_begin_0, end = value_tail_31_end_0, x = value_slice_31_cast_fp16)[name = string("value_tail_31_cast_fp16")]; + int32 var_13163 = const()[name = string("op_13163"), val = int32(2)]; + bool shifted_value_31_interleave_0 = const()[name = string("shifted_value_31_interleave_0"), val = bool(false)]; + tensor shifted_value_31_cast_fp16 = concat(axis = var_13163, interleave = shifted_value_31_interleave_0, values = (value_tail_31_cast_fp16, var_13003))[name = string("shifted_value_31_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([37, 0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([38, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = shifted_value_31_cast_fp16, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_89")]; + tensor var_13191_begin_0 = const()[name = string("op_13191_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_13191_end_0 = const()[name = string("op_13191_end_0"), val = tensor([16, 1, 512, 256])]; + tensor var_13191_end_mask_0 = const()[name = string("op_13191_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13191_cast_fp16 = slice_by_index(begin = var_13191_begin_0, end = var_13191_end_0, end_mask = var_13191_end_mask_0, x = coreml_update_state_89)[name = string("op_13191_cast_fp16")]; + tensor var_13198_begin_0 = const()[name = string("op_13198_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_13198_end_0 = const()[name = string("op_13198_end_0"), val = tensor([38, 1, 512, 256])]; + tensor var_13198_end_mask_0 = const()[name = string("op_13198_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13198_cast_fp16 = slice_by_index(begin = var_13198_begin_0, end = var_13198_end_0, end_mask = var_13198_end_mask_0, x = coreml_update_state_89)[name = string("op_13198_cast_fp16")]; + tensor var_13235 = const()[name = string("op_13235"), val = tensor([1, 4, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_13235, x = var_13191_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_13255 = const()[name = string("op_13255"), val = tensor([1, 4, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_13255, x = var_13198_cast_fp16)[name = string("x_299_cast_fp16")]; + bool var_13282_transpose_x_1 = const()[name = string("op_13282_transpose_x_1"), val = bool(false)]; + bool var_13282_transpose_y_1 = const()[name = string("op_13282_transpose_y_1"), val = bool(true)]; + tensor var_13282 = matmul(transpose_x = var_13282_transpose_x_1, transpose_y = var_13282_transpose_y_1, x = query_states_73_cast_fp16, y = x_293_cast_fp16)[name = string("op_13282")]; + fp16 var_13283_to_fp16 = const()[name = string("op_13283_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_109_cast_fp16 = mul(x = var_13282, y = var_13283_to_fp16)[name = string("attn_weights_109_cast_fp16")]; + tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = var_2105)[name = string("attn_weights_111_cast_fp16")]; + int32 var_13318 = const()[name = string("op_13318"), val = int32(-1)]; + tensor attn_weights_113_cast_fp16 = softmax(axis = var_13318, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; + bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; + bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; + tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = x_299_cast_fp16)[name = string("attn_output_181_cast_fp16")]; + tensor var_13329_perm_0 = const()[name = string("op_13329_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13333 = const()[name = string("op_13333"), val = tensor([1, 1, 1024])]; + tensor var_13329_cast_fp16 = transpose(perm = var_13329_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_63")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_13333, x = var_13329_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_13338 = const()[name = string("op_13338"), val = tensor([0, 2, 1])]; + string var_13354_pad_type_0 = const()[name = string("op_13354_pad_type_0"), val = string("valid")]; + int32 var_13354_groups_0 = const()[name = string("op_13354_groups_0"), val = int32(1)]; + tensor var_13354_strides_0 = const()[name = string("op_13354_strides_0"), val = tensor([1])]; + tensor var_13354_pad_0 = const()[name = string("op_13354_pad_0"), val = tensor([0, 0])]; + tensor var_13354_dilations_0 = const()[name = string("op_13354_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736904640))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13339_cast_fp16 = transpose(perm = var_13338, x = attn_output_185_cast_fp16)[name = string("transpose_62")]; + tensor var_13354_cast_fp16 = conv(dilations = var_13354_dilations_0, groups = var_13354_groups_0, pad = var_13354_pad_0, pad_type = var_13354_pad_type_0, strides = var_13354_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_13339_cast_fp16)[name = string("op_13354_cast_fp16")]; + tensor var_13358 = const()[name = string("op_13358"), val = tensor([0, 2, 1])]; + int32 var_13369 = const()[name = string("op_13369"), val = int32(-1)]; + fp16 const_710_promoted_to_fp16 = const()[name = string("const_710_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_301_cast_fp16 = transpose(perm = var_13358, x = var_13354_cast_fp16)[name = string("transpose_61")]; + tensor var_13371_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_710_promoted_to_fp16)[name = string("op_13371_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_13369, interleave = input_371_interleave_0, values = (hidden_states_301_cast_fp16, var_13371_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_13366_to_fp16 = const()[name = string("op_13366_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_13366_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; + tensor var_13385_to_fp16 = const()[name = string("op_13385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736941568)))]; + tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_13385_to_fp16)[name = string("attn_output_189_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = add(x = hidden_states_293_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + int32 var_13398 = const()[name = string("op_13398"), val = int32(-1)]; + fp16 const_714_promoted_to_fp16 = const()[name = string("const_714_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13400_cast_fp16 = mul(x = hidden_states_303_cast_fp16, y = const_714_promoted_to_fp16)[name = string("op_13400_cast_fp16")]; + bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; + tensor input_373_cast_fp16 = concat(axis = var_13398, interleave = input_373_interleave_0, values = (hidden_states_303_cast_fp16, var_13400_cast_fp16))[name = string("input_373_cast_fp16")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_13395_to_fp16 = const()[name = string("op_13395_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_13395_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; + tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; + tensor var_13414_to_fp16 = const()[name = string("op_13414_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736943936)))]; + tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_13414_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_13426 = const()[name = string("op_13426"), val = tensor([0, 2, 1])]; + tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; + tensor var_13427_cast_fp16 = transpose(perm = var_13426, x = x_301_cast_fp16)[name = string("transpose_60")]; + tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_13427_cast_fp16)[name = string("input_375_cast_fp16")]; + string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; + tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; + tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; + int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736946304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742918336))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743139584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749111616))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; + string var_13452_mode_0 = const()[name = string("op_13452_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_13452_cast_fp16 = gelu(mode = var_13452_mode_0, x = x_303_cast_fp16)[name = string("op_13452_cast_fp16")]; + tensor input_377_cast_fp16 = mul(x = var_13452_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749332864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755304896))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; + tensor var_13460_axes_0 = const()[name = string("op_13460_axes_0"), val = tensor([2])]; + tensor var_13460_cast_fp16 = squeeze(axes = var_13460_axes_0, x = e_37_cast_fp16)[name = string("op_13460_cast_fp16")]; + tensor var_13461 = const()[name = string("op_13461"), val = tensor([0, 2, 1])]; + int32 var_13472 = const()[name = string("op_13472"), val = int32(-1)]; + fp16 const_718_promoted_to_fp16 = const()[name = string("const_718_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_305_cast_fp16 = transpose(perm = var_13461, x = var_13460_cast_fp16)[name = string("transpose_59")]; + tensor var_13474_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_718_promoted_to_fp16)[name = string("op_13474_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_13472, interleave = input_379_interleave_0, values = (hidden_states_305_cast_fp16, var_13474_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; + fp16 var_13469_to_fp16 = const()[name = string("op_13469_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_13469_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; + tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; + tensor var_13488_to_fp16 = const()[name = string("op_13488_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755341824)))]; + tensor hidden_states_307_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_13488_to_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor hidden_states_309_cast_fp16 = add(x = hidden_states_303_cast_fp16, y = hidden_states_307_cast_fp16)[name = string("hidden_states_309_cast_fp16")]; + int32 var_13539 = const()[name = string("op_13539"), val = int32(-1)]; + fp16 const_722_promoted_to_fp16 = const()[name = string("const_722_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13541_cast_fp16 = mul(x = hidden_states_309_cast_fp16, y = const_722_promoted_to_fp16)[name = string("op_13541_cast_fp16")]; + bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; + tensor input_381_cast_fp16 = concat(axis = var_13539, interleave = input_381_interleave_0, values = (hidden_states_309_cast_fp16, var_13541_cast_fp16))[name = string("input_381_cast_fp16")]; + tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; + fp16 var_13536_to_fp16 = const()[name = string("op_13536_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_13536_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; + tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; + tensor var_13555_to_fp16 = const()[name = string("op_13555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755344192)))]; + tensor hidden_states_311_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_13555_to_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor var_13560 = const()[name = string("op_13560"), val = tensor([0, 2, 1])]; + tensor var_13563_axes_0 = const()[name = string("op_13563_axes_0"), val = tensor([2])]; + tensor var_13561_cast_fp16 = transpose(perm = var_13560, x = hidden_states_311_cast_fp16)[name = string("transpose_58")]; + tensor var_13563_cast_fp16 = expand_dims(axes = var_13563_axes_0, x = var_13561_cast_fp16)[name = string("op_13563_cast_fp16")]; + string var_13579_pad_type_0 = const()[name = string("op_13579_pad_type_0"), val = string("valid")]; + tensor var_13579_strides_0 = const()[name = string("op_13579_strides_0"), val = tensor([1, 1])]; + tensor var_13579_pad_0 = const()[name = string("op_13579_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13579_dilations_0 = const()[name = string("op_13579_dilations_0"), val = tensor([1, 1])]; + int32 var_13579_groups_0 = const()[name = string("op_13579_groups_0"), val = int32(1)]; + tensor var_13579 = conv(dilations = var_13579_dilations_0, groups = var_13579_groups_0, pad = var_13579_pad_0, pad_type = var_13579_pad_type_0, strides = var_13579_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_13563_cast_fp16)[name = string("op_13579")]; + tensor var_13584 = const()[name = string("op_13584"), val = tensor([1, 4, 1, 256])]; + tensor var_13585 = reshape(shape = var_13584, x = var_13579)[name = string("op_13585")]; + string var_13601_pad_type_0 = const()[name = string("op_13601_pad_type_0"), val = string("valid")]; + tensor var_13601_strides_0 = const()[name = string("op_13601_strides_0"), val = tensor([1, 1])]; + tensor var_13601_pad_0 = const()[name = string("op_13601_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13601_dilations_0 = const()[name = string("op_13601_dilations_0"), val = tensor([1, 1])]; + int32 var_13601_groups_0 = const()[name = string("op_13601_groups_0"), val = int32(1)]; + tensor var_13601 = conv(dilations = var_13601_dilations_0, groups = var_13601_groups_0, pad = var_13601_pad_0, pad_type = var_13601_pad_type_0, strides = var_13601_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_13563_cast_fp16)[name = string("op_13601")]; + tensor var_13606 = const()[name = string("op_13606"), val = tensor([1, 1, 1, 256])]; + tensor var_13607 = reshape(shape = var_13606, x = var_13601)[name = string("op_13607")]; + string var_13623_pad_type_0 = const()[name = string("op_13623_pad_type_0"), val = string("valid")]; + tensor var_13623_strides_0 = const()[name = string("op_13623_strides_0"), val = tensor([1, 1])]; + tensor var_13623_pad_0 = const()[name = string("op_13623_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13623_dilations_0 = const()[name = string("op_13623_dilations_0"), val = tensor([1, 1])]; + int32 var_13623_groups_0 = const()[name = string("op_13623_groups_0"), val = int32(1)]; + tensor var_13623 = conv(dilations = var_13623_dilations_0, groups = var_13623_groups_0, pad = var_13623_pad_0, pad_type = var_13623_pad_type_0, strides = var_13623_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_13563_cast_fp16)[name = string("op_13623")]; + tensor var_13628 = const()[name = string("op_13628"), val = tensor([1, 1, 1, 256])]; + tensor var_13629 = reshape(shape = var_13628, x = var_13623)[name = string("op_13629")]; + int32 var_13644 = const()[name = string("op_13644"), val = int32(-1)]; + fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; + tensor var_13646 = mul(x = var_13585, y = const_726_promoted)[name = string("op_13646")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_13644, interleave = input_385_interleave_0, values = (var_13585, var_13646))[name = string("input_385")]; + tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; + fp16 var_13641_to_fp16 = const()[name = string("op_13641_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_13641_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; + tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; + tensor var_13660_to_fp16 = const()[name = string("op_13660_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755346560)))]; + tensor q_39_cast_fp16 = mul(x = normed_463, y = var_13660_to_fp16)[name = string("q_39_cast_fp16")]; + int32 var_13671 = const()[name = string("op_13671"), val = int32(-1)]; + fp16 const_730_promoted = const()[name = string("const_730_promoted"), val = fp16(-0x1p+0)]; + tensor var_13673 = mul(x = var_13607, y = const_730_promoted)[name = string("op_13673")]; + bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; + tensor input_387 = concat(axis = var_13671, interleave = input_387_interleave_0, values = (var_13607, var_13673))[name = string("input_387")]; + tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; + fp16 var_13668_to_fp16 = const()[name = string("op_13668_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_13668_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; + tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; + tensor var_13687_to_fp16 = const()[name = string("op_13687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347136)))]; + tensor k_39_cast_fp16 = mul(x = normed_467, y = var_13687_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_13689_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13689_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_736_promoted_to_fp16 = const()[name = string("const_736_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13710_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_736_promoted_to_fp16)[name = string("op_13710_cast_fp16")]; + int32 var_13712 = const()[name = string("op_13712"), val = int32(-1)]; + bool var_13713_interleave_0 = const()[name = string("op_13713_interleave_0"), val = bool(false)]; + tensor var_13713_cast_fp16 = concat(axis = var_13712, interleave = var_13713_interleave_0, values = (var_13710_cast_fp16, x1_77_cast_fp16))[name = string("op_13713_cast_fp16")]; + tensor var_13714_cast_fp16 = mul(x = var_13713_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13714_cast_fp16")]; + tensor query_states_77_cast_fp16 = add(x = var_13689_cast_fp16, y = var_13714_cast_fp16)[name = string("query_states_77_cast_fp16")]; + tensor var_13717_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13717_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_739_promoted_to_fp16 = const()[name = string("const_739_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13738_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_739_promoted_to_fp16)[name = string("op_13738_cast_fp16")]; + int32 var_13740 = const()[name = string("op_13740"), val = int32(-1)]; + bool var_13741_interleave_0 = const()[name = string("op_13741_interleave_0"), val = bool(false)]; + tensor var_13741_cast_fp16 = concat(axis = var_13740, interleave = var_13741_interleave_0, values = (var_13738_cast_fp16, x1_79_cast_fp16))[name = string("op_13741_cast_fp16")]; + tensor var_13742_cast_fp16 = mul(x = var_13741_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13742_cast_fp16")]; + tensor key_states_77_cast_fp16 = add(x = var_13717_cast_fp16, y = var_13742_cast_fp16)[name = string("key_states_77_cast_fp16")]; + tensor key_slice_33_begin_0 = const()[name = string("key_slice_33_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor key_slice_33_end_0 = const()[name = string("key_slice_33_end_0"), val = tensor([17, 1, 512, 256])]; + tensor key_slice_33_end_mask_0 = const()[name = string("key_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_33_cast_fp16 = slice_by_index(begin = key_slice_33_begin_0, end = key_slice_33_end_0, end_mask = key_slice_33_end_mask_0, x = coreml_update_state_89)[name = string("key_slice_33_cast_fp16")]; + tensor key_tail_33_begin_0 = const()[name = string("key_tail_33_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_33_end_0 = const()[name = string("key_tail_33_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_33_cast_fp16 = slice_by_index(begin = key_tail_33_begin_0, end = key_tail_33_end_0, x = key_slice_33_cast_fp16)[name = string("key_tail_33_cast_fp16")]; + int32 var_13755 = const()[name = string("op_13755"), val = int32(2)]; + bool shifted_key_33_interleave_0 = const()[name = string("shifted_key_33_interleave_0"), val = bool(false)]; + tensor shifted_key_33_cast_fp16 = concat(axis = var_13755, interleave = shifted_key_33_interleave_0, values = (key_tail_33_cast_fp16, key_states_77_cast_fp16))[name = string("shifted_key_33_cast_fp16")]; + tensor concat_88 = const()[name = string("concat_88"), val = tensor([16, 0, 0, 0])]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([17, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_88, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_89, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = shifted_key_33_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_90")]; + tensor value_slice_33_begin_0 = const()[name = string("value_slice_33_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor value_slice_33_end_0 = const()[name = string("value_slice_33_end_0"), val = tensor([39, 1, 512, 256])]; + tensor value_slice_33_end_mask_0 = const()[name = string("value_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_33_cast_fp16 = slice_by_index(begin = value_slice_33_begin_0, end = value_slice_33_end_0, end_mask = value_slice_33_end_mask_0, x = coreml_update_state_90)[name = string("value_slice_33_cast_fp16")]; + tensor value_tail_33_begin_0 = const()[name = string("value_tail_33_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_33_end_0 = const()[name = string("value_tail_33_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_33_cast_fp16 = slice_by_index(begin = value_tail_33_begin_0, end = value_tail_33_end_0, x = value_slice_33_cast_fp16)[name = string("value_tail_33_cast_fp16")]; + int32 var_13789 = const()[name = string("op_13789"), val = int32(2)]; + bool shifted_value_33_interleave_0 = const()[name = string("shifted_value_33_interleave_0"), val = bool(false)]; + tensor shifted_value_33_cast_fp16 = concat(axis = var_13789, interleave = shifted_value_33_interleave_0, values = (value_tail_33_cast_fp16, var_13629))[name = string("shifted_value_33_cast_fp16")]; + tensor concat_90 = const()[name = string("concat_90"), val = tensor([38, 0, 0, 0])]; + tensor concat_91 = const()[name = string("concat_91"), val = tensor([39, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = shifted_value_33_cast_fp16, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_91")]; + tensor var_13817_begin_0 = const()[name = string("op_13817_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_13817_end_0 = const()[name = string("op_13817_end_0"), val = tensor([17, 1, 512, 256])]; + tensor var_13817_end_mask_0 = const()[name = string("op_13817_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13817_cast_fp16 = slice_by_index(begin = var_13817_begin_0, end = var_13817_end_0, end_mask = var_13817_end_mask_0, x = coreml_update_state_91)[name = string("op_13817_cast_fp16")]; + tensor var_13824_begin_0 = const()[name = string("op_13824_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_13824_end_0 = const()[name = string("op_13824_end_0"), val = tensor([39, 1, 512, 256])]; + tensor var_13824_end_mask_0 = const()[name = string("op_13824_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13824_cast_fp16 = slice_by_index(begin = var_13824_begin_0, end = var_13824_end_0, end_mask = var_13824_end_mask_0, x = coreml_update_state_91)[name = string("op_13824_cast_fp16")]; + tensor var_13861 = const()[name = string("op_13861"), val = tensor([1, 4, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_13861, x = var_13817_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_13881 = const()[name = string("op_13881"), val = tensor([1, 4, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_13881, x = var_13824_cast_fp16)[name = string("x_315_cast_fp16")]; + bool var_13908_transpose_x_1 = const()[name = string("op_13908_transpose_x_1"), val = bool(false)]; + bool var_13908_transpose_y_1 = const()[name = string("op_13908_transpose_y_1"), val = bool(true)]; + tensor var_13908 = matmul(transpose_x = var_13908_transpose_x_1, transpose_y = var_13908_transpose_y_1, x = query_states_77_cast_fp16, y = x_309_cast_fp16)[name = string("op_13908")]; + fp16 var_13909_to_fp16 = const()[name = string("op_13909_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_115_cast_fp16 = mul(x = var_13908, y = var_13909_to_fp16)[name = string("attn_weights_115_cast_fp16")]; + tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = var_2105)[name = string("attn_weights_117_cast_fp16")]; + int32 var_13944 = const()[name = string("op_13944"), val = int32(-1)]; + tensor attn_weights_119_cast_fp16 = softmax(axis = var_13944, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; + bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; + bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; + tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = x_315_cast_fp16)[name = string("attn_output_191_cast_fp16")]; + tensor var_13955_perm_0 = const()[name = string("op_13955_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13959 = const()[name = string("op_13959"), val = tensor([1, 1, 1024])]; + tensor var_13955_cast_fp16 = transpose(perm = var_13955_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_57")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_13959, x = var_13955_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_13964 = const()[name = string("op_13964"), val = tensor([0, 2, 1])]; + string var_13980_pad_type_0 = const()[name = string("op_13980_pad_type_0"), val = string("valid")]; + int32 var_13980_groups_0 = const()[name = string("op_13980_groups_0"), val = int32(1)]; + tensor var_13980_strides_0 = const()[name = string("op_13980_strides_0"), val = tensor([1])]; + tensor var_13980_pad_0 = const()[name = string("op_13980_pad_0"), val = tensor([0, 0])]; + tensor var_13980_dilations_0 = const()[name = string("op_13980_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756232512))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13965_cast_fp16 = transpose(perm = var_13964, x = attn_output_195_cast_fp16)[name = string("transpose_56")]; + tensor var_13980_cast_fp16 = conv(dilations = var_13980_dilations_0, groups = var_13980_groups_0, pad = var_13980_pad_0, pad_type = var_13980_pad_type_0, strides = var_13980_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_13965_cast_fp16)[name = string("op_13980_cast_fp16")]; + tensor var_13984 = const()[name = string("op_13984"), val = tensor([0, 2, 1])]; + int32 var_13995 = const()[name = string("op_13995"), val = int32(-1)]; + fp16 const_748_promoted_to_fp16 = const()[name = string("const_748_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_317_cast_fp16 = transpose(perm = var_13984, x = var_13980_cast_fp16)[name = string("transpose_55")]; + tensor var_13997_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_748_promoted_to_fp16)[name = string("op_13997_cast_fp16")]; + bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; + tensor input_391_cast_fp16 = concat(axis = var_13995, interleave = input_391_interleave_0, values = (hidden_states_317_cast_fp16, var_13997_cast_fp16))[name = string("input_391_cast_fp16")]; + tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; + fp16 var_13992_to_fp16 = const()[name = string("op_13992_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_13992_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; + tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; + tensor var_14011_to_fp16 = const()[name = string("op_14011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756269440)))]; + tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_14011_to_fp16)[name = string("attn_output_199_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_309_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + int32 var_14024 = const()[name = string("op_14024"), val = int32(-1)]; + fp16 const_752_promoted_to_fp16 = const()[name = string("const_752_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14026_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = const_752_promoted_to_fp16)[name = string("op_14026_cast_fp16")]; + bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; + tensor input_393_cast_fp16 = concat(axis = var_14024, interleave = input_393_interleave_0, values = (hidden_states_319_cast_fp16, var_14026_cast_fp16))[name = string("input_393_cast_fp16")]; + tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; + fp16 var_14021_to_fp16 = const()[name = string("op_14021_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_14021_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; + tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; + tensor var_14040_to_fp16 = const()[name = string("op_14040_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756271808)))]; + tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_14040_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_14052 = const()[name = string("op_14052"), val = tensor([0, 2, 1])]; + tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; + tensor var_14053_cast_fp16 = transpose(perm = var_14052, x = x_317_cast_fp16)[name = string("transpose_54")]; + tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_14053_cast_fp16)[name = string("input_395_cast_fp16")]; + string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; + tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; + tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; + int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756274176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762246208))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762467456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768439488))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; + string var_14078_mode_0 = const()[name = string("op_14078_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14078_cast_fp16 = gelu(mode = var_14078_mode_0, x = x_319_cast_fp16)[name = string("op_14078_cast_fp16")]; + tensor input_397_cast_fp16 = mul(x = var_14078_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768660736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774632768))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; + tensor var_14086_axes_0 = const()[name = string("op_14086_axes_0"), val = tensor([2])]; + tensor var_14086_cast_fp16 = squeeze(axes = var_14086_axes_0, x = e_39_cast_fp16)[name = string("op_14086_cast_fp16")]; + tensor var_14087 = const()[name = string("op_14087"), val = tensor([0, 2, 1])]; + int32 var_14098 = const()[name = string("op_14098"), val = int32(-1)]; + fp16 const_756_promoted_to_fp16 = const()[name = string("const_756_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_321_cast_fp16 = transpose(perm = var_14087, x = var_14086_cast_fp16)[name = string("transpose_53")]; + tensor var_14100_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_756_promoted_to_fp16)[name = string("op_14100_cast_fp16")]; + bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; + tensor input_399_cast_fp16 = concat(axis = var_14098, interleave = input_399_interleave_0, values = (hidden_states_321_cast_fp16, var_14100_cast_fp16))[name = string("input_399_cast_fp16")]; + tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; + fp16 var_14095_to_fp16 = const()[name = string("op_14095_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_14095_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; + tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; + tensor var_14114_to_fp16 = const()[name = string("op_14114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774669696)))]; + tensor hidden_states_323_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_14114_to_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor hidden_states_325_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = hidden_states_323_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; + int32 var_14165 = const()[name = string("op_14165"), val = int32(-1)]; + fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14167_cast_fp16 = mul(x = hidden_states_325_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_14167_cast_fp16")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401_cast_fp16 = concat(axis = var_14165, interleave = input_401_interleave_0, values = (hidden_states_325_cast_fp16, var_14167_cast_fp16))[name = string("input_401_cast_fp16")]; + tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; + fp16 var_14162_to_fp16 = const()[name = string("op_14162_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_14162_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; + tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; + tensor var_14181_to_fp16 = const()[name = string("op_14181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774672064)))]; + tensor hidden_states_327_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_14181_to_fp16)[name = string("hidden_states_327_cast_fp16")]; + tensor var_14186 = const()[name = string("op_14186"), val = tensor([0, 2, 1])]; + tensor var_14189_axes_0 = const()[name = string("op_14189_axes_0"), val = tensor([2])]; + tensor var_14187_cast_fp16 = transpose(perm = var_14186, x = hidden_states_327_cast_fp16)[name = string("transpose_52")]; + tensor var_14189_cast_fp16 = expand_dims(axes = var_14189_axes_0, x = var_14187_cast_fp16)[name = string("op_14189_cast_fp16")]; + string var_14205_pad_type_0 = const()[name = string("op_14205_pad_type_0"), val = string("valid")]; + tensor var_14205_strides_0 = const()[name = string("op_14205_strides_0"), val = tensor([1, 1])]; + tensor var_14205_pad_0 = const()[name = string("op_14205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14205_dilations_0 = const()[name = string("op_14205_dilations_0"), val = tensor([1, 1])]; + int32 var_14205_groups_0 = const()[name = string("op_14205_groups_0"), val = int32(1)]; + tensor var_14205 = conv(dilations = var_14205_dilations_0, groups = var_14205_groups_0, pad = var_14205_pad_0, pad_type = var_14205_pad_type_0, strides = var_14205_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_14189_cast_fp16)[name = string("op_14205")]; + tensor var_14210 = const()[name = string("op_14210"), val = tensor([1, 4, 1, 256])]; + tensor var_14211 = reshape(shape = var_14210, x = var_14205)[name = string("op_14211")]; + string var_14227_pad_type_0 = const()[name = string("op_14227_pad_type_0"), val = string("valid")]; + tensor var_14227_strides_0 = const()[name = string("op_14227_strides_0"), val = tensor([1, 1])]; + tensor var_14227_pad_0 = const()[name = string("op_14227_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14227_dilations_0 = const()[name = string("op_14227_dilations_0"), val = tensor([1, 1])]; + int32 var_14227_groups_0 = const()[name = string("op_14227_groups_0"), val = int32(1)]; + tensor var_14227 = conv(dilations = var_14227_dilations_0, groups = var_14227_groups_0, pad = var_14227_pad_0, pad_type = var_14227_pad_type_0, strides = var_14227_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_14189_cast_fp16)[name = string("op_14227")]; + tensor var_14232 = const()[name = string("op_14232"), val = tensor([1, 1, 1, 256])]; + tensor var_14233 = reshape(shape = var_14232, x = var_14227)[name = string("op_14233")]; + string var_14249_pad_type_0 = const()[name = string("op_14249_pad_type_0"), val = string("valid")]; + tensor var_14249_strides_0 = const()[name = string("op_14249_strides_0"), val = tensor([1, 1])]; + tensor var_14249_pad_0 = const()[name = string("op_14249_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14249_dilations_0 = const()[name = string("op_14249_dilations_0"), val = tensor([1, 1])]; + int32 var_14249_groups_0 = const()[name = string("op_14249_groups_0"), val = int32(1)]; + tensor var_14249 = conv(dilations = var_14249_dilations_0, groups = var_14249_groups_0, pad = var_14249_pad_0, pad_type = var_14249_pad_type_0, strides = var_14249_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_14189_cast_fp16)[name = string("op_14249")]; + tensor var_14254 = const()[name = string("op_14254"), val = tensor([1, 1, 1, 256])]; + tensor var_14255 = reshape(shape = var_14254, x = var_14249)[name = string("op_14255")]; + int32 var_14270 = const()[name = string("op_14270"), val = int32(-1)]; + fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; + tensor var_14272 = mul(x = var_14211, y = const_764_promoted)[name = string("op_14272")]; + bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; + tensor input_405 = concat(axis = var_14270, interleave = input_405_interleave_0, values = (var_14211, var_14272))[name = string("input_405")]; + tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; + fp16 var_14267_to_fp16 = const()[name = string("op_14267_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_14267_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; + tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; + tensor var_14286_to_fp16 = const()[name = string("op_14286_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774674432)))]; + tensor q_41_cast_fp16 = mul(x = normed_487, y = var_14286_to_fp16)[name = string("q_41_cast_fp16")]; + int32 var_14297 = const()[name = string("op_14297"), val = int32(-1)]; + fp16 const_768_promoted = const()[name = string("const_768_promoted"), val = fp16(-0x1p+0)]; + tensor var_14299 = mul(x = var_14233, y = const_768_promoted)[name = string("op_14299")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407 = concat(axis = var_14297, interleave = input_407_interleave_0, values = (var_14233, var_14299))[name = string("input_407")]; + tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; + fp16 var_14294_to_fp16 = const()[name = string("op_14294_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_14294_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; + tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; + tensor var_14313_to_fp16 = const()[name = string("op_14313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675008)))]; + tensor k_41_cast_fp16 = mul(x = normed_491, y = var_14313_to_fp16)[name = string("k_41_cast_fp16")]; + tensor var_14315_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14315_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_774_promoted_to_fp16 = const()[name = string("const_774_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14336_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_774_promoted_to_fp16)[name = string("op_14336_cast_fp16")]; + int32 var_14338 = const()[name = string("op_14338"), val = int32(-1)]; + bool var_14339_interleave_0 = const()[name = string("op_14339_interleave_0"), val = bool(false)]; + tensor var_14339_cast_fp16 = concat(axis = var_14338, interleave = var_14339_interleave_0, values = (var_14336_cast_fp16, x1_81_cast_fp16))[name = string("op_14339_cast_fp16")]; + tensor var_14340_cast_fp16 = mul(x = var_14339_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14340_cast_fp16")]; + tensor query_states_81_cast_fp16 = add(x = var_14315_cast_fp16, y = var_14340_cast_fp16)[name = string("query_states_81_cast_fp16")]; + tensor var_14343_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14343_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_777_promoted_to_fp16 = const()[name = string("const_777_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14364_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_777_promoted_to_fp16)[name = string("op_14364_cast_fp16")]; + int32 var_14366 = const()[name = string("op_14366"), val = int32(-1)]; + bool var_14367_interleave_0 = const()[name = string("op_14367_interleave_0"), val = bool(false)]; + tensor var_14367_cast_fp16 = concat(axis = var_14366, interleave = var_14367_interleave_0, values = (var_14364_cast_fp16, x1_83_cast_fp16))[name = string("op_14367_cast_fp16")]; + tensor var_14368_cast_fp16 = mul(x = var_14367_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14368_cast_fp16")]; + tensor key_states_81_cast_fp16 = add(x = var_14343_cast_fp16, y = var_14368_cast_fp16)[name = string("key_states_81_cast_fp16")]; + tensor key_slice_35_begin_0 = const()[name = string("key_slice_35_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor key_slice_35_end_0 = const()[name = string("key_slice_35_end_0"), val = tensor([18, 1, 512, 256])]; + tensor key_slice_35_end_mask_0 = const()[name = string("key_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_35_cast_fp16 = slice_by_index(begin = key_slice_35_begin_0, end = key_slice_35_end_0, end_mask = key_slice_35_end_mask_0, x = coreml_update_state_91)[name = string("key_slice_35_cast_fp16")]; + tensor key_tail_35_begin_0 = const()[name = string("key_tail_35_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_35_end_0 = const()[name = string("key_tail_35_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_35_cast_fp16 = slice_by_index(begin = key_tail_35_begin_0, end = key_tail_35_end_0, x = key_slice_35_cast_fp16)[name = string("key_tail_35_cast_fp16")]; + int32 var_14381 = const()[name = string("op_14381"), val = int32(2)]; + bool shifted_key_35_interleave_0 = const()[name = string("shifted_key_35_interleave_0"), val = bool(false)]; + tensor shifted_key_35_cast_fp16 = concat(axis = var_14381, interleave = shifted_key_35_interleave_0, values = (key_tail_35_cast_fp16, key_states_81_cast_fp16))[name = string("shifted_key_35_cast_fp16")]; + tensor concat_92 = const()[name = string("concat_92"), val = tensor([17, 0, 0, 0])]; + tensor concat_93 = const()[name = string("concat_93"), val = tensor([18, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = shifted_key_35_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_92")]; + tensor value_slice_35_begin_0 = const()[name = string("value_slice_35_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor value_slice_35_end_0 = const()[name = string("value_slice_35_end_0"), val = tensor([40, 1, 512, 256])]; + tensor value_slice_35_end_mask_0 = const()[name = string("value_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_35_cast_fp16 = slice_by_index(begin = value_slice_35_begin_0, end = value_slice_35_end_0, end_mask = value_slice_35_end_mask_0, x = coreml_update_state_92)[name = string("value_slice_35_cast_fp16")]; + tensor value_tail_35_begin_0 = const()[name = string("value_tail_35_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_35_end_0 = const()[name = string("value_tail_35_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_35_cast_fp16 = slice_by_index(begin = value_tail_35_begin_0, end = value_tail_35_end_0, x = value_slice_35_cast_fp16)[name = string("value_tail_35_cast_fp16")]; + int32 var_14415 = const()[name = string("op_14415"), val = int32(2)]; + bool shifted_value_35_interleave_0 = const()[name = string("shifted_value_35_interleave_0"), val = bool(false)]; + tensor shifted_value_35_cast_fp16 = concat(axis = var_14415, interleave = shifted_value_35_interleave_0, values = (value_tail_35_cast_fp16, var_14255))[name = string("shifted_value_35_cast_fp16")]; + tensor concat_94 = const()[name = string("concat_94"), val = tensor([39, 0, 0, 0])]; + tensor concat_95 = const()[name = string("concat_95"), val = tensor([40, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = shifted_value_35_cast_fp16, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_93")]; + tensor var_14443_begin_0 = const()[name = string("op_14443_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_14443_end_0 = const()[name = string("op_14443_end_0"), val = tensor([18, 1, 512, 256])]; + tensor var_14443_end_mask_0 = const()[name = string("op_14443_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14443_cast_fp16 = slice_by_index(begin = var_14443_begin_0, end = var_14443_end_0, end_mask = var_14443_end_mask_0, x = coreml_update_state_93)[name = string("op_14443_cast_fp16")]; + tensor var_14450_begin_0 = const()[name = string("op_14450_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_14450_end_0 = const()[name = string("op_14450_end_0"), val = tensor([40, 1, 512, 256])]; + tensor var_14450_end_mask_0 = const()[name = string("op_14450_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14450_cast_fp16 = slice_by_index(begin = var_14450_begin_0, end = var_14450_end_0, end_mask = var_14450_end_mask_0, x = coreml_update_state_93)[name = string("op_14450_cast_fp16")]; + tensor var_14487 = const()[name = string("op_14487"), val = tensor([1, 4, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_14487, x = var_14443_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_14507 = const()[name = string("op_14507"), val = tensor([1, 4, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_14507, x = var_14450_cast_fp16)[name = string("x_331_cast_fp16")]; + bool var_14534_transpose_x_1 = const()[name = string("op_14534_transpose_x_1"), val = bool(false)]; + bool var_14534_transpose_y_1 = const()[name = string("op_14534_transpose_y_1"), val = bool(true)]; + tensor var_14534 = matmul(transpose_x = var_14534_transpose_x_1, transpose_y = var_14534_transpose_y_1, x = query_states_81_cast_fp16, y = x_325_cast_fp16)[name = string("op_14534")]; + fp16 var_14535_to_fp16 = const()[name = string("op_14535_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_121_cast_fp16 = mul(x = var_14534, y = var_14535_to_fp16)[name = string("attn_weights_121_cast_fp16")]; + tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = var_2105)[name = string("attn_weights_123_cast_fp16")]; + int32 var_14570 = const()[name = string("op_14570"), val = int32(-1)]; + tensor attn_weights_125_cast_fp16 = softmax(axis = var_14570, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; + bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; + bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; + tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = x_331_cast_fp16)[name = string("attn_output_201_cast_fp16")]; + tensor var_14581_perm_0 = const()[name = string("op_14581_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14585 = const()[name = string("op_14585"), val = tensor([1, 1, 1024])]; + tensor var_14581_cast_fp16 = transpose(perm = var_14581_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_51")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_14585, x = var_14581_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_14590 = const()[name = string("op_14590"), val = tensor([0, 2, 1])]; + string var_14606_pad_type_0 = const()[name = string("op_14606_pad_type_0"), val = string("valid")]; + int32 var_14606_groups_0 = const()[name = string("op_14606_groups_0"), val = int32(1)]; + tensor var_14606_strides_0 = const()[name = string("op_14606_strides_0"), val = tensor([1])]; + tensor var_14606_pad_0 = const()[name = string("op_14606_pad_0"), val = tensor([0, 0])]; + tensor var_14606_dilations_0 = const()[name = string("op_14606_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775560384))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14591_cast_fp16 = transpose(perm = var_14590, x = attn_output_205_cast_fp16)[name = string("transpose_50")]; + tensor var_14606_cast_fp16 = conv(dilations = var_14606_dilations_0, groups = var_14606_groups_0, pad = var_14606_pad_0, pad_type = var_14606_pad_type_0, strides = var_14606_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_14591_cast_fp16)[name = string("op_14606_cast_fp16")]; + tensor var_14610 = const()[name = string("op_14610"), val = tensor([0, 2, 1])]; + int32 var_14621 = const()[name = string("op_14621"), val = int32(-1)]; + fp16 const_786_promoted_to_fp16 = const()[name = string("const_786_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_333_cast_fp16 = transpose(perm = var_14610, x = var_14606_cast_fp16)[name = string("transpose_49")]; + tensor var_14623_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_786_promoted_to_fp16)[name = string("op_14623_cast_fp16")]; + bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; + tensor input_411_cast_fp16 = concat(axis = var_14621, interleave = input_411_interleave_0, values = (hidden_states_333_cast_fp16, var_14623_cast_fp16))[name = string("input_411_cast_fp16")]; + tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; + fp16 var_14618_to_fp16 = const()[name = string("op_14618_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_14618_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; + tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; + tensor var_14637_to_fp16 = const()[name = string("op_14637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775597312)))]; + tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_14637_to_fp16)[name = string("attn_output_209_cast_fp16")]; + tensor hidden_states_335_cast_fp16 = add(x = hidden_states_325_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; + int32 var_14650 = const()[name = string("op_14650"), val = int32(-1)]; + fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14652_cast_fp16 = mul(x = hidden_states_335_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14652_cast_fp16")]; + bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; + tensor input_413_cast_fp16 = concat(axis = var_14650, interleave = input_413_interleave_0, values = (hidden_states_335_cast_fp16, var_14652_cast_fp16))[name = string("input_413_cast_fp16")]; + tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; + fp16 var_14647_to_fp16 = const()[name = string("op_14647_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_14647_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; + tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; + tensor var_14666_to_fp16 = const()[name = string("op_14666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775599680)))]; + tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_14666_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_14678 = const()[name = string("op_14678"), val = tensor([0, 2, 1])]; + tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; + tensor var_14679_cast_fp16 = transpose(perm = var_14678, x = x_333_cast_fp16)[name = string("transpose_48")]; + tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_14679_cast_fp16)[name = string("input_415_cast_fp16")]; + string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; + tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; + tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; + int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775602048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781574080))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781795328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787767360))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; + string var_14704_mode_0 = const()[name = string("op_14704_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14704_cast_fp16 = gelu(mode = var_14704_mode_0, x = x_335_cast_fp16)[name = string("op_14704_cast_fp16")]; + tensor input_417_cast_fp16 = mul(x = var_14704_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787988608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793960640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; + tensor var_14712_axes_0 = const()[name = string("op_14712_axes_0"), val = tensor([2])]; + tensor var_14712_cast_fp16 = squeeze(axes = var_14712_axes_0, x = e_41_cast_fp16)[name = string("op_14712_cast_fp16")]; + tensor var_14713 = const()[name = string("op_14713"), val = tensor([0, 2, 1])]; + int32 var_14724 = const()[name = string("op_14724"), val = int32(-1)]; + fp16 const_794_promoted_to_fp16 = const()[name = string("const_794_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_337_cast_fp16 = transpose(perm = var_14713, x = var_14712_cast_fp16)[name = string("transpose_47")]; + tensor var_14726_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_794_promoted_to_fp16)[name = string("op_14726_cast_fp16")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419_cast_fp16 = concat(axis = var_14724, interleave = input_419_interleave_0, values = (hidden_states_337_cast_fp16, var_14726_cast_fp16))[name = string("input_419_cast_fp16")]; + tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; + fp16 var_14721_to_fp16 = const()[name = string("op_14721_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_14721_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; + tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; + tensor var_14740_to_fp16 = const()[name = string("op_14740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793997568)))]; + tensor hidden_states_339_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_14740_to_fp16)[name = string("hidden_states_339_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = add(x = hidden_states_335_cast_fp16, y = hidden_states_339_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + int32 var_14791 = const()[name = string("op_14791"), val = int32(-1)]; + fp16 const_798_promoted_to_fp16 = const()[name = string("const_798_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14793_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = const_798_promoted_to_fp16)[name = string("op_14793_cast_fp16")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421_cast_fp16 = concat(axis = var_14791, interleave = input_421_interleave_0, values = (hidden_states_341_cast_fp16, var_14793_cast_fp16))[name = string("input_421_cast_fp16")]; + tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; + fp16 var_14788_to_fp16 = const()[name = string("op_14788_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_14788_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; + tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; + tensor var_14807_to_fp16 = const()[name = string("op_14807_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793999936)))]; + tensor hidden_states_343_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_14807_to_fp16)[name = string("hidden_states_343_cast_fp16")]; + tensor var_14812 = const()[name = string("op_14812"), val = tensor([0, 2, 1])]; + tensor var_14815_axes_0 = const()[name = string("op_14815_axes_0"), val = tensor([2])]; + tensor var_14813_cast_fp16 = transpose(perm = var_14812, x = hidden_states_343_cast_fp16)[name = string("transpose_46")]; + tensor var_14815_cast_fp16 = expand_dims(axes = var_14815_axes_0, x = var_14813_cast_fp16)[name = string("op_14815_cast_fp16")]; + string var_14831_pad_type_0 = const()[name = string("op_14831_pad_type_0"), val = string("valid")]; + tensor var_14831_strides_0 = const()[name = string("op_14831_strides_0"), val = tensor([1, 1])]; + tensor var_14831_pad_0 = const()[name = string("op_14831_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14831_dilations_0 = const()[name = string("op_14831_dilations_0"), val = tensor([1, 1])]; + int32 var_14831_groups_0 = const()[name = string("op_14831_groups_0"), val = int32(1)]; + tensor var_14831 = conv(dilations = var_14831_dilations_0, groups = var_14831_groups_0, pad = var_14831_pad_0, pad_type = var_14831_pad_type_0, strides = var_14831_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_14815_cast_fp16)[name = string("op_14831")]; + tensor var_14836 = const()[name = string("op_14836"), val = tensor([1, 4, 1, 256])]; + tensor var_14837 = reshape(shape = var_14836, x = var_14831)[name = string("op_14837")]; + string var_14853_pad_type_0 = const()[name = string("op_14853_pad_type_0"), val = string("valid")]; + tensor var_14853_strides_0 = const()[name = string("op_14853_strides_0"), val = tensor([1, 1])]; + tensor var_14853_pad_0 = const()[name = string("op_14853_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14853_dilations_0 = const()[name = string("op_14853_dilations_0"), val = tensor([1, 1])]; + int32 var_14853_groups_0 = const()[name = string("op_14853_groups_0"), val = int32(1)]; + tensor var_14853 = conv(dilations = var_14853_dilations_0, groups = var_14853_groups_0, pad = var_14853_pad_0, pad_type = var_14853_pad_type_0, strides = var_14853_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_14815_cast_fp16)[name = string("op_14853")]; + tensor var_14858 = const()[name = string("op_14858"), val = tensor([1, 1, 1, 256])]; + tensor var_14859 = reshape(shape = var_14858, x = var_14853)[name = string("op_14859")]; + string var_14875_pad_type_0 = const()[name = string("op_14875_pad_type_0"), val = string("valid")]; + tensor var_14875_strides_0 = const()[name = string("op_14875_strides_0"), val = tensor([1, 1])]; + tensor var_14875_pad_0 = const()[name = string("op_14875_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14875_dilations_0 = const()[name = string("op_14875_dilations_0"), val = tensor([1, 1])]; + int32 var_14875_groups_0 = const()[name = string("op_14875_groups_0"), val = int32(1)]; + tensor var_14875 = conv(dilations = var_14875_dilations_0, groups = var_14875_groups_0, pad = var_14875_pad_0, pad_type = var_14875_pad_type_0, strides = var_14875_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_14815_cast_fp16)[name = string("op_14875")]; + tensor var_14880 = const()[name = string("op_14880"), val = tensor([1, 1, 1, 256])]; + tensor var_14881 = reshape(shape = var_14880, x = var_14875)[name = string("op_14881")]; + int32 var_14896 = const()[name = string("op_14896"), val = int32(-1)]; + fp16 const_802_promoted = const()[name = string("const_802_promoted"), val = fp16(-0x1p+0)]; + tensor var_14898 = mul(x = var_14837, y = const_802_promoted)[name = string("op_14898")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425 = concat(axis = var_14896, interleave = input_425_interleave_0, values = (var_14837, var_14898))[name = string("input_425")]; + tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; + fp16 var_14893_to_fp16 = const()[name = string("op_14893_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_14893_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; + tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; + tensor var_14912_to_fp16 = const()[name = string("op_14912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002304)))]; + tensor q_43_cast_fp16 = mul(x = normed_511, y = var_14912_to_fp16)[name = string("q_43_cast_fp16")]; + int32 var_14923 = const()[name = string("op_14923"), val = int32(-1)]; + fp16 const_806_promoted = const()[name = string("const_806_promoted"), val = fp16(-0x1p+0)]; + tensor var_14925 = mul(x = var_14859, y = const_806_promoted)[name = string("op_14925")]; + bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; + tensor input_427 = concat(axis = var_14923, interleave = input_427_interleave_0, values = (var_14859, var_14925))[name = string("input_427")]; + tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; + fp16 var_14920_to_fp16 = const()[name = string("op_14920_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_14920_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; + tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; + tensor var_14939_to_fp16 = const()[name = string("op_14939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002880)))]; + tensor k_43_cast_fp16 = mul(x = normed_515, y = var_14939_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_14941_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14941_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_812_promoted_to_fp16 = const()[name = string("const_812_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14962_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_812_promoted_to_fp16)[name = string("op_14962_cast_fp16")]; + int32 var_14964 = const()[name = string("op_14964"), val = int32(-1)]; + bool var_14965_interleave_0 = const()[name = string("op_14965_interleave_0"), val = bool(false)]; + tensor var_14965_cast_fp16 = concat(axis = var_14964, interleave = var_14965_interleave_0, values = (var_14962_cast_fp16, x1_85_cast_fp16))[name = string("op_14965_cast_fp16")]; + tensor var_14966_cast_fp16 = mul(x = var_14965_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14966_cast_fp16")]; + tensor query_states_85_cast_fp16 = add(x = var_14941_cast_fp16, y = var_14966_cast_fp16)[name = string("query_states_85_cast_fp16")]; + tensor var_14969_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14969_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_815_promoted_to_fp16 = const()[name = string("const_815_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14990_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_815_promoted_to_fp16)[name = string("op_14990_cast_fp16")]; + int32 var_14992 = const()[name = string("op_14992"), val = int32(-1)]; + bool var_14993_interleave_0 = const()[name = string("op_14993_interleave_0"), val = bool(false)]; + tensor var_14993_cast_fp16 = concat(axis = var_14992, interleave = var_14993_interleave_0, values = (var_14990_cast_fp16, x1_87_cast_fp16))[name = string("op_14993_cast_fp16")]; + tensor var_14994_cast_fp16 = mul(x = var_14993_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14994_cast_fp16")]; + tensor key_states_85_cast_fp16 = add(x = var_14969_cast_fp16, y = var_14994_cast_fp16)[name = string("key_states_85_cast_fp16")]; + tensor key_slice_37_begin_0 = const()[name = string("key_slice_37_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor key_slice_37_end_0 = const()[name = string("key_slice_37_end_0"), val = tensor([19, 1, 512, 256])]; + tensor key_slice_37_end_mask_0 = const()[name = string("key_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_37_cast_fp16 = slice_by_index(begin = key_slice_37_begin_0, end = key_slice_37_end_0, end_mask = key_slice_37_end_mask_0, x = coreml_update_state_93)[name = string("key_slice_37_cast_fp16")]; + tensor key_tail_37_begin_0 = const()[name = string("key_tail_37_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_37_end_0 = const()[name = string("key_tail_37_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_37_cast_fp16 = slice_by_index(begin = key_tail_37_begin_0, end = key_tail_37_end_0, x = key_slice_37_cast_fp16)[name = string("key_tail_37_cast_fp16")]; + int32 var_15007 = const()[name = string("op_15007"), val = int32(2)]; + bool shifted_key_37_interleave_0 = const()[name = string("shifted_key_37_interleave_0"), val = bool(false)]; + tensor shifted_key_37_cast_fp16 = concat(axis = var_15007, interleave = shifted_key_37_interleave_0, values = (key_tail_37_cast_fp16, key_states_85_cast_fp16))[name = string("shifted_key_37_cast_fp16")]; + tensor concat_96 = const()[name = string("concat_96"), val = tensor([18, 0, 0, 0])]; + tensor concat_97 = const()[name = string("concat_97"), val = tensor([19, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = shifted_key_37_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_94")]; + tensor value_slice_37_begin_0 = const()[name = string("value_slice_37_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor value_slice_37_end_0 = const()[name = string("value_slice_37_end_0"), val = tensor([41, 1, 512, 256])]; + tensor value_slice_37_end_mask_0 = const()[name = string("value_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_37_cast_fp16 = slice_by_index(begin = value_slice_37_begin_0, end = value_slice_37_end_0, end_mask = value_slice_37_end_mask_0, x = coreml_update_state_94)[name = string("value_slice_37_cast_fp16")]; + tensor value_tail_37_begin_0 = const()[name = string("value_tail_37_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_37_end_0 = const()[name = string("value_tail_37_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_37_cast_fp16 = slice_by_index(begin = value_tail_37_begin_0, end = value_tail_37_end_0, x = value_slice_37_cast_fp16)[name = string("value_tail_37_cast_fp16")]; + int32 var_15041 = const()[name = string("op_15041"), val = int32(2)]; + bool shifted_value_37_interleave_0 = const()[name = string("shifted_value_37_interleave_0"), val = bool(false)]; + tensor shifted_value_37_cast_fp16 = concat(axis = var_15041, interleave = shifted_value_37_interleave_0, values = (value_tail_37_cast_fp16, var_14881))[name = string("shifted_value_37_cast_fp16")]; + tensor concat_98 = const()[name = string("concat_98"), val = tensor([40, 0, 0, 0])]; + tensor concat_99 = const()[name = string("concat_99"), val = tensor([41, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = shifted_value_37_cast_fp16, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_95")]; + tensor var_15069_begin_0 = const()[name = string("op_15069_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_15069_end_0 = const()[name = string("op_15069_end_0"), val = tensor([19, 1, 512, 256])]; + tensor var_15069_end_mask_0 = const()[name = string("op_15069_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15069_cast_fp16 = slice_by_index(begin = var_15069_begin_0, end = var_15069_end_0, end_mask = var_15069_end_mask_0, x = coreml_update_state_95)[name = string("op_15069_cast_fp16")]; + tensor var_15076_begin_0 = const()[name = string("op_15076_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_15076_end_0 = const()[name = string("op_15076_end_0"), val = tensor([41, 1, 512, 256])]; + tensor var_15076_end_mask_0 = const()[name = string("op_15076_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15076_cast_fp16 = slice_by_index(begin = var_15076_begin_0, end = var_15076_end_0, end_mask = var_15076_end_mask_0, x = coreml_update_state_95)[name = string("op_15076_cast_fp16")]; + tensor var_15113 = const()[name = string("op_15113"), val = tensor([1, 4, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_15113, x = var_15069_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_15133 = const()[name = string("op_15133"), val = tensor([1, 4, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_15133, x = var_15076_cast_fp16)[name = string("x_347_cast_fp16")]; + bool var_15160_transpose_x_1 = const()[name = string("op_15160_transpose_x_1"), val = bool(false)]; + bool var_15160_transpose_y_1 = const()[name = string("op_15160_transpose_y_1"), val = bool(true)]; + tensor var_15160 = matmul(transpose_x = var_15160_transpose_x_1, transpose_y = var_15160_transpose_y_1, x = query_states_85_cast_fp16, y = x_341_cast_fp16)[name = string("op_15160")]; + fp16 var_15161_to_fp16 = const()[name = string("op_15161_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_127_cast_fp16 = mul(x = var_15160, y = var_15161_to_fp16)[name = string("attn_weights_127_cast_fp16")]; + tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = var_2105)[name = string("attn_weights_129_cast_fp16")]; + int32 var_15196 = const()[name = string("op_15196"), val = int32(-1)]; + tensor attn_weights_131_cast_fp16 = softmax(axis = var_15196, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; + bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; + bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; + tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = x_347_cast_fp16)[name = string("attn_output_211_cast_fp16")]; + tensor var_15207_perm_0 = const()[name = string("op_15207_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15211 = const()[name = string("op_15211"), val = tensor([1, 1, 1024])]; + tensor var_15207_cast_fp16 = transpose(perm = var_15207_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_45")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_15211, x = var_15207_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_15216 = const()[name = string("op_15216"), val = tensor([0, 2, 1])]; + string var_15232_pad_type_0 = const()[name = string("op_15232_pad_type_0"), val = string("valid")]; + int32 var_15232_groups_0 = const()[name = string("op_15232_groups_0"), val = int32(1)]; + tensor var_15232_strides_0 = const()[name = string("op_15232_strides_0"), val = tensor([1])]; + tensor var_15232_pad_0 = const()[name = string("op_15232_pad_0"), val = tensor([0, 0])]; + tensor var_15232_dilations_0 = const()[name = string("op_15232_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794003456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794888256))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15217_cast_fp16 = transpose(perm = var_15216, x = attn_output_215_cast_fp16)[name = string("transpose_44")]; + tensor var_15232_cast_fp16 = conv(dilations = var_15232_dilations_0, groups = var_15232_groups_0, pad = var_15232_pad_0, pad_type = var_15232_pad_type_0, strides = var_15232_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_15217_cast_fp16)[name = string("op_15232_cast_fp16")]; + tensor var_15236 = const()[name = string("op_15236"), val = tensor([0, 2, 1])]; + int32 var_15247 = const()[name = string("op_15247"), val = int32(-1)]; + fp16 const_824_promoted_to_fp16 = const()[name = string("const_824_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_349_cast_fp16 = transpose(perm = var_15236, x = var_15232_cast_fp16)[name = string("transpose_43")]; + tensor var_15249_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_824_promoted_to_fp16)[name = string("op_15249_cast_fp16")]; + bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; + tensor input_431_cast_fp16 = concat(axis = var_15247, interleave = input_431_interleave_0, values = (hidden_states_349_cast_fp16, var_15249_cast_fp16))[name = string("input_431_cast_fp16")]; + tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; + fp16 var_15244_to_fp16 = const()[name = string("op_15244_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_15244_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; + tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; + tensor var_15263_to_fp16 = const()[name = string("op_15263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794925184)))]; + tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_15263_to_fp16)[name = string("attn_output_219_cast_fp16")]; + tensor hidden_states_351_cast_fp16 = add(x = hidden_states_341_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; + int32 var_15276 = const()[name = string("op_15276"), val = int32(-1)]; + fp16 const_828_promoted_to_fp16 = const()[name = string("const_828_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15278_cast_fp16 = mul(x = hidden_states_351_cast_fp16, y = const_828_promoted_to_fp16)[name = string("op_15278_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_15276, interleave = input_433_interleave_0, values = (hidden_states_351_cast_fp16, var_15278_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; + fp16 var_15273_to_fp16 = const()[name = string("op_15273_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_15273_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; + tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; + tensor var_15292_to_fp16 = const()[name = string("op_15292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794927552)))]; + tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_15292_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_15304 = const()[name = string("op_15304"), val = tensor([0, 2, 1])]; + tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; + tensor var_15305_cast_fp16 = transpose(perm = var_15304, x = x_349_cast_fp16)[name = string("transpose_42")]; + tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_15305_cast_fp16)[name = string("input_435_cast_fp16")]; + string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; + tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; + tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; + int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794929920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800901952))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801123200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807095232))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; + string var_15330_mode_0 = const()[name = string("op_15330_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_15330_cast_fp16 = gelu(mode = var_15330_mode_0, x = x_351_cast_fp16)[name = string("op_15330_cast_fp16")]; + tensor input_437_cast_fp16 = mul(x = var_15330_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807316480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813288512))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; + tensor var_15338_axes_0 = const()[name = string("op_15338_axes_0"), val = tensor([2])]; + tensor var_15338_cast_fp16 = squeeze(axes = var_15338_axes_0, x = e_43_cast_fp16)[name = string("op_15338_cast_fp16")]; + tensor var_15339 = const()[name = string("op_15339"), val = tensor([0, 2, 1])]; + int32 var_15350 = const()[name = string("op_15350"), val = int32(-1)]; + fp16 const_832_promoted_to_fp16 = const()[name = string("const_832_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_353_cast_fp16 = transpose(perm = var_15339, x = var_15338_cast_fp16)[name = string("transpose_41")]; + tensor var_15352_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_832_promoted_to_fp16)[name = string("op_15352_cast_fp16")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439_cast_fp16 = concat(axis = var_15350, interleave = input_439_interleave_0, values = (hidden_states_353_cast_fp16, var_15352_cast_fp16))[name = string("input_439_cast_fp16")]; + tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; + fp16 var_15347_to_fp16 = const()[name = string("op_15347_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_15347_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; + tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; + tensor var_15366_to_fp16 = const()[name = string("op_15366_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813325440)))]; + tensor hidden_states_355_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_15366_to_fp16)[name = string("hidden_states_355_cast_fp16")]; + tensor hidden_states_357_cast_fp16 = add(x = hidden_states_351_cast_fp16, y = hidden_states_355_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; + int32 var_15417 = const()[name = string("op_15417"), val = int32(-1)]; + fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15419_cast_fp16 = mul(x = hidden_states_357_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15419_cast_fp16")]; + bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; + tensor input_441_cast_fp16 = concat(axis = var_15417, interleave = input_441_interleave_0, values = (hidden_states_357_cast_fp16, var_15419_cast_fp16))[name = string("input_441_cast_fp16")]; + tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; + fp16 var_15414_to_fp16 = const()[name = string("op_15414_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_15414_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; + tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; + tensor var_15433_to_fp16 = const()[name = string("op_15433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813327808)))]; + tensor hidden_states_359_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_15433_to_fp16)[name = string("hidden_states_359_cast_fp16")]; + tensor var_15438 = const()[name = string("op_15438"), val = tensor([0, 2, 1])]; + tensor var_15441_axes_0 = const()[name = string("op_15441_axes_0"), val = tensor([2])]; + tensor var_15439_cast_fp16 = transpose(perm = var_15438, x = hidden_states_359_cast_fp16)[name = string("transpose_40")]; + tensor var_15441_cast_fp16 = expand_dims(axes = var_15441_axes_0, x = var_15439_cast_fp16)[name = string("op_15441_cast_fp16")]; + string var_15457_pad_type_0 = const()[name = string("op_15457_pad_type_0"), val = string("valid")]; + tensor var_15457_strides_0 = const()[name = string("op_15457_strides_0"), val = tensor([1, 1])]; + tensor var_15457_pad_0 = const()[name = string("op_15457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15457_dilations_0 = const()[name = string("op_15457_dilations_0"), val = tensor([1, 1])]; + int32 var_15457_groups_0 = const()[name = string("op_15457_groups_0"), val = int32(1)]; + tensor var_15457 = conv(dilations = var_15457_dilations_0, groups = var_15457_groups_0, pad = var_15457_pad_0, pad_type = var_15457_pad_type_0, strides = var_15457_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_15441_cast_fp16)[name = string("op_15457")]; + tensor var_15462 = const()[name = string("op_15462"), val = tensor([1, 4, 1, 256])]; + tensor var_15463 = reshape(shape = var_15462, x = var_15457)[name = string("op_15463")]; + string var_15479_pad_type_0 = const()[name = string("op_15479_pad_type_0"), val = string("valid")]; + tensor var_15479_strides_0 = const()[name = string("op_15479_strides_0"), val = tensor([1, 1])]; + tensor var_15479_pad_0 = const()[name = string("op_15479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15479_dilations_0 = const()[name = string("op_15479_dilations_0"), val = tensor([1, 1])]; + int32 var_15479_groups_0 = const()[name = string("op_15479_groups_0"), val = int32(1)]; + tensor var_15479 = conv(dilations = var_15479_dilations_0, groups = var_15479_groups_0, pad = var_15479_pad_0, pad_type = var_15479_pad_type_0, strides = var_15479_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_15441_cast_fp16)[name = string("op_15479")]; + tensor var_15484 = const()[name = string("op_15484"), val = tensor([1, 1, 1, 256])]; + tensor var_15485 = reshape(shape = var_15484, x = var_15479)[name = string("op_15485")]; + string var_15501_pad_type_0 = const()[name = string("op_15501_pad_type_0"), val = string("valid")]; + tensor var_15501_strides_0 = const()[name = string("op_15501_strides_0"), val = tensor([1, 1])]; + tensor var_15501_pad_0 = const()[name = string("op_15501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15501_dilations_0 = const()[name = string("op_15501_dilations_0"), val = tensor([1, 1])]; + int32 var_15501_groups_0 = const()[name = string("op_15501_groups_0"), val = int32(1)]; + tensor var_15501 = conv(dilations = var_15501_dilations_0, groups = var_15501_groups_0, pad = var_15501_pad_0, pad_type = var_15501_pad_type_0, strides = var_15501_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_15441_cast_fp16)[name = string("op_15501")]; + tensor var_15506 = const()[name = string("op_15506"), val = tensor([1, 1, 1, 256])]; + tensor var_15507 = reshape(shape = var_15506, x = var_15501)[name = string("op_15507")]; + int32 var_15522 = const()[name = string("op_15522"), val = int32(-1)]; + fp16 const_840_promoted = const()[name = string("const_840_promoted"), val = fp16(-0x1p+0)]; + tensor var_15524 = mul(x = var_15463, y = const_840_promoted)[name = string("op_15524")]; + bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; + tensor input_445 = concat(axis = var_15522, interleave = input_445_interleave_0, values = (var_15463, var_15524))[name = string("input_445")]; + tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; + fp16 var_15519_to_fp16 = const()[name = string("op_15519_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_15519_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; + tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; + tensor var_15538_to_fp16 = const()[name = string("op_15538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330176)))]; + tensor q_45_cast_fp16 = mul(x = normed_535, y = var_15538_to_fp16)[name = string("q_45_cast_fp16")]; + int32 var_15549 = const()[name = string("op_15549"), val = int32(-1)]; + fp16 const_844_promoted = const()[name = string("const_844_promoted"), val = fp16(-0x1p+0)]; + tensor var_15551 = mul(x = var_15485, y = const_844_promoted)[name = string("op_15551")]; + bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; + tensor input_447 = concat(axis = var_15549, interleave = input_447_interleave_0, values = (var_15485, var_15551))[name = string("input_447")]; + tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; + fp16 var_15546_to_fp16 = const()[name = string("op_15546_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_15546_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; + tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; + tensor var_15565_to_fp16 = const()[name = string("op_15565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330752)))]; + tensor k_45_cast_fp16 = mul(x = normed_539, y = var_15565_to_fp16)[name = string("k_45_cast_fp16")]; + tensor var_15567_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15567_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_850_promoted_to_fp16 = const()[name = string("const_850_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15588_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_850_promoted_to_fp16)[name = string("op_15588_cast_fp16")]; + int32 var_15590 = const()[name = string("op_15590"), val = int32(-1)]; + bool var_15591_interleave_0 = const()[name = string("op_15591_interleave_0"), val = bool(false)]; + tensor var_15591_cast_fp16 = concat(axis = var_15590, interleave = var_15591_interleave_0, values = (var_15588_cast_fp16, x1_89_cast_fp16))[name = string("op_15591_cast_fp16")]; + tensor var_15592_cast_fp16 = mul(x = var_15591_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15592_cast_fp16")]; + tensor query_states_89_cast_fp16 = add(x = var_15567_cast_fp16, y = var_15592_cast_fp16)[name = string("query_states_89_cast_fp16")]; + tensor var_15595_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15595_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_853_promoted_to_fp16 = const()[name = string("const_853_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15616_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_853_promoted_to_fp16)[name = string("op_15616_cast_fp16")]; + int32 var_15618 = const()[name = string("op_15618"), val = int32(-1)]; + bool var_15619_interleave_0 = const()[name = string("op_15619_interleave_0"), val = bool(false)]; + tensor var_15619_cast_fp16 = concat(axis = var_15618, interleave = var_15619_interleave_0, values = (var_15616_cast_fp16, x1_91_cast_fp16))[name = string("op_15619_cast_fp16")]; + tensor var_15620_cast_fp16 = mul(x = var_15619_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15620_cast_fp16")]; + tensor key_states_89_cast_fp16 = add(x = var_15595_cast_fp16, y = var_15620_cast_fp16)[name = string("key_states_89_cast_fp16")]; + tensor key_slice_39_begin_0 = const()[name = string("key_slice_39_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor key_slice_39_end_0 = const()[name = string("key_slice_39_end_0"), val = tensor([20, 1, 512, 256])]; + tensor key_slice_39_end_mask_0 = const()[name = string("key_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_39_cast_fp16 = slice_by_index(begin = key_slice_39_begin_0, end = key_slice_39_end_0, end_mask = key_slice_39_end_mask_0, x = coreml_update_state_95)[name = string("key_slice_39_cast_fp16")]; + tensor key_tail_39_begin_0 = const()[name = string("key_tail_39_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_39_end_0 = const()[name = string("key_tail_39_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_39_cast_fp16 = slice_by_index(begin = key_tail_39_begin_0, end = key_tail_39_end_0, x = key_slice_39_cast_fp16)[name = string("key_tail_39_cast_fp16")]; + int32 var_15633 = const()[name = string("op_15633"), val = int32(2)]; + bool shifted_key_39_interleave_0 = const()[name = string("shifted_key_39_interleave_0"), val = bool(false)]; + tensor shifted_key_39_cast_fp16 = concat(axis = var_15633, interleave = shifted_key_39_interleave_0, values = (key_tail_39_cast_fp16, key_states_89_cast_fp16))[name = string("shifted_key_39_cast_fp16")]; + tensor concat_100 = const()[name = string("concat_100"), val = tensor([19, 0, 0, 0])]; + tensor concat_101 = const()[name = string("concat_101"), val = tensor([20, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_100, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_101, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = shifted_key_39_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_96")]; + tensor value_slice_39_begin_0 = const()[name = string("value_slice_39_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor value_slice_39_end_0 = const()[name = string("value_slice_39_end_0"), val = tensor([42, 1, 512, 256])]; + tensor value_slice_39_end_mask_0 = const()[name = string("value_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_39_cast_fp16 = slice_by_index(begin = value_slice_39_begin_0, end = value_slice_39_end_0, end_mask = value_slice_39_end_mask_0, x = coreml_update_state_96)[name = string("value_slice_39_cast_fp16")]; + tensor value_tail_39_begin_0 = const()[name = string("value_tail_39_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_39_end_0 = const()[name = string("value_tail_39_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_39_cast_fp16 = slice_by_index(begin = value_tail_39_begin_0, end = value_tail_39_end_0, x = value_slice_39_cast_fp16)[name = string("value_tail_39_cast_fp16")]; + int32 var_15667 = const()[name = string("op_15667"), val = int32(2)]; + bool shifted_value_39_interleave_0 = const()[name = string("shifted_value_39_interleave_0"), val = bool(false)]; + tensor shifted_value_39_cast_fp16 = concat(axis = var_15667, interleave = shifted_value_39_interleave_0, values = (value_tail_39_cast_fp16, var_15507))[name = string("shifted_value_39_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([41, 0, 0, 0])]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([42, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = shifted_value_39_cast_fp16, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_97")]; + tensor var_15695_begin_0 = const()[name = string("op_15695_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_15695_end_0 = const()[name = string("op_15695_end_0"), val = tensor([20, 1, 512, 256])]; + tensor var_15695_end_mask_0 = const()[name = string("op_15695_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15695_cast_fp16 = slice_by_index(begin = var_15695_begin_0, end = var_15695_end_0, end_mask = var_15695_end_mask_0, x = coreml_update_state_97)[name = string("op_15695_cast_fp16")]; + tensor var_15702_begin_0 = const()[name = string("op_15702_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_15702_end_0 = const()[name = string("op_15702_end_0"), val = tensor([42, 1, 512, 256])]; + tensor var_15702_end_mask_0 = const()[name = string("op_15702_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15702_cast_fp16 = slice_by_index(begin = var_15702_begin_0, end = var_15702_end_0, end_mask = var_15702_end_mask_0, x = coreml_update_state_97)[name = string("op_15702_cast_fp16")]; + tensor var_15739 = const()[name = string("op_15739"), val = tensor([1, 4, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_15739, x = var_15695_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_15759 = const()[name = string("op_15759"), val = tensor([1, 4, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_15759, x = var_15702_cast_fp16)[name = string("x_363_cast_fp16")]; + bool var_15786_transpose_x_1 = const()[name = string("op_15786_transpose_x_1"), val = bool(false)]; + bool var_15786_transpose_y_1 = const()[name = string("op_15786_transpose_y_1"), val = bool(true)]; + tensor var_15786 = matmul(transpose_x = var_15786_transpose_x_1, transpose_y = var_15786_transpose_y_1, x = query_states_89_cast_fp16, y = x_357_cast_fp16)[name = string("op_15786")]; + fp16 var_15787_to_fp16 = const()[name = string("op_15787_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_133_cast_fp16 = mul(x = var_15786, y = var_15787_to_fp16)[name = string("attn_weights_133_cast_fp16")]; + tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = var_2105)[name = string("attn_weights_135_cast_fp16")]; + int32 var_15822 = const()[name = string("op_15822"), val = int32(-1)]; + tensor attn_weights_137_cast_fp16 = softmax(axis = var_15822, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; + bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; + bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; + tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = x_363_cast_fp16)[name = string("attn_output_221_cast_fp16")]; + tensor var_15833_perm_0 = const()[name = string("op_15833_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15837 = const()[name = string("op_15837"), val = tensor([1, 1, 1024])]; + tensor var_15833_cast_fp16 = transpose(perm = var_15833_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_39")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_15837, x = var_15833_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_15842 = const()[name = string("op_15842"), val = tensor([0, 2, 1])]; + string var_15858_pad_type_0 = const()[name = string("op_15858_pad_type_0"), val = string("valid")]; + int32 var_15858_groups_0 = const()[name = string("op_15858_groups_0"), val = int32(1)]; + tensor var_15858_strides_0 = const()[name = string("op_15858_strides_0"), val = tensor([1])]; + tensor var_15858_pad_0 = const()[name = string("op_15858_pad_0"), val = tensor([0, 0])]; + tensor var_15858_dilations_0 = const()[name = string("op_15858_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813331328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814216128))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15843_cast_fp16 = transpose(perm = var_15842, x = attn_output_225_cast_fp16)[name = string("transpose_38")]; + tensor var_15858_cast_fp16 = conv(dilations = var_15858_dilations_0, groups = var_15858_groups_0, pad = var_15858_pad_0, pad_type = var_15858_pad_type_0, strides = var_15858_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_15843_cast_fp16)[name = string("op_15858_cast_fp16")]; + tensor var_15862 = const()[name = string("op_15862"), val = tensor([0, 2, 1])]; + int32 var_15873 = const()[name = string("op_15873"), val = int32(-1)]; + fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_365_cast_fp16 = transpose(perm = var_15862, x = var_15858_cast_fp16)[name = string("transpose_37")]; + tensor var_15875_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_15875_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_15873, interleave = input_451_interleave_0, values = (hidden_states_365_cast_fp16, var_15875_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; + fp16 var_15870_to_fp16 = const()[name = string("op_15870_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_15870_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; + tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; + tensor var_15889_to_fp16 = const()[name = string("op_15889_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814253056)))]; + tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_15889_to_fp16)[name = string("attn_output_229_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = add(x = hidden_states_357_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + int32 var_15902 = const()[name = string("op_15902"), val = int32(-1)]; + fp16 const_866_promoted_to_fp16 = const()[name = string("const_866_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15904_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = const_866_promoted_to_fp16)[name = string("op_15904_cast_fp16")]; + bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; + tensor input_453_cast_fp16 = concat(axis = var_15902, interleave = input_453_interleave_0, values = (hidden_states_367_cast_fp16, var_15904_cast_fp16))[name = string("input_453_cast_fp16")]; + tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; + fp16 var_15899_to_fp16 = const()[name = string("op_15899_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_15899_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; + tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; + tensor var_15918_to_fp16 = const()[name = string("op_15918_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814255424)))]; + tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_15918_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_15930 = const()[name = string("op_15930"), val = tensor([0, 2, 1])]; + tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; + tensor var_15931_cast_fp16 = transpose(perm = var_15930, x = x_365_cast_fp16)[name = string("transpose_36")]; + tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_15931_cast_fp16)[name = string("input_455_cast_fp16")]; + string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; + tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; + tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; + int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814257792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820229824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820451072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826423104))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; + string var_15956_mode_0 = const()[name = string("op_15956_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_15956_cast_fp16 = gelu(mode = var_15956_mode_0, x = x_367_cast_fp16)[name = string("op_15956_cast_fp16")]; + tensor input_457_cast_fp16 = mul(x = var_15956_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826644352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832616384))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; + tensor var_15964_axes_0 = const()[name = string("op_15964_axes_0"), val = tensor([2])]; + tensor var_15964_cast_fp16 = squeeze(axes = var_15964_axes_0, x = e_45_cast_fp16)[name = string("op_15964_cast_fp16")]; + tensor var_15965 = const()[name = string("op_15965"), val = tensor([0, 2, 1])]; + int32 var_15976 = const()[name = string("op_15976"), val = int32(-1)]; + fp16 const_870_promoted_to_fp16 = const()[name = string("const_870_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_369_cast_fp16 = transpose(perm = var_15965, x = var_15964_cast_fp16)[name = string("transpose_35")]; + tensor var_15978_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_870_promoted_to_fp16)[name = string("op_15978_cast_fp16")]; + bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; + tensor input_459_cast_fp16 = concat(axis = var_15976, interleave = input_459_interleave_0, values = (hidden_states_369_cast_fp16, var_15978_cast_fp16))[name = string("input_459_cast_fp16")]; + tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; + fp16 var_15973_to_fp16 = const()[name = string("op_15973_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_15973_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; + tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; + tensor var_15992_to_fp16 = const()[name = string("op_15992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832653312)))]; + tensor hidden_states_371_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_15992_to_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor hidden_states_373_cast_fp16 = add(x = hidden_states_367_cast_fp16, y = hidden_states_371_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; + int32 var_16043 = const()[name = string("op_16043"), val = int32(-1)]; + fp16 const_874_promoted_to_fp16 = const()[name = string("const_874_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16045_cast_fp16 = mul(x = hidden_states_373_cast_fp16, y = const_874_promoted_to_fp16)[name = string("op_16045_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_16043, interleave = input_461_interleave_0, values = (hidden_states_373_cast_fp16, var_16045_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; + fp16 var_16040_to_fp16 = const()[name = string("op_16040_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_16040_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; + tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; + tensor var_16059_to_fp16 = const()[name = string("op_16059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832655680)))]; + tensor hidden_states_375_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_16059_to_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor var_16064 = const()[name = string("op_16064"), val = tensor([0, 2, 1])]; + tensor var_16067_axes_0 = const()[name = string("op_16067_axes_0"), val = tensor([2])]; + tensor var_16065_cast_fp16 = transpose(perm = var_16064, x = hidden_states_375_cast_fp16)[name = string("transpose_34")]; + tensor var_16067_cast_fp16 = expand_dims(axes = var_16067_axes_0, x = var_16065_cast_fp16)[name = string("op_16067_cast_fp16")]; + string var_16083_pad_type_0 = const()[name = string("op_16083_pad_type_0"), val = string("valid")]; + tensor var_16083_strides_0 = const()[name = string("op_16083_strides_0"), val = tensor([1, 1])]; + tensor var_16083_pad_0 = const()[name = string("op_16083_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16083_dilations_0 = const()[name = string("op_16083_dilations_0"), val = tensor([1, 1])]; + int32 var_16083_groups_0 = const()[name = string("op_16083_groups_0"), val = int32(1)]; + tensor var_16083 = conv(dilations = var_16083_dilations_0, groups = var_16083_groups_0, pad = var_16083_pad_0, pad_type = var_16083_pad_type_0, strides = var_16083_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_16067_cast_fp16)[name = string("op_16083")]; + tensor var_16088 = const()[name = string("op_16088"), val = tensor([1, 4, 1, 256])]; + tensor var_16089 = reshape(shape = var_16088, x = var_16083)[name = string("op_16089")]; + string var_16105_pad_type_0 = const()[name = string("op_16105_pad_type_0"), val = string("valid")]; + tensor var_16105_strides_0 = const()[name = string("op_16105_strides_0"), val = tensor([1, 1])]; + tensor var_16105_pad_0 = const()[name = string("op_16105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16105_dilations_0 = const()[name = string("op_16105_dilations_0"), val = tensor([1, 1])]; + int32 var_16105_groups_0 = const()[name = string("op_16105_groups_0"), val = int32(1)]; + tensor var_16105 = conv(dilations = var_16105_dilations_0, groups = var_16105_groups_0, pad = var_16105_pad_0, pad_type = var_16105_pad_type_0, strides = var_16105_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_16067_cast_fp16)[name = string("op_16105")]; + tensor var_16110 = const()[name = string("op_16110"), val = tensor([1, 1, 1, 256])]; + tensor var_16111 = reshape(shape = var_16110, x = var_16105)[name = string("op_16111")]; + string var_16127_pad_type_0 = const()[name = string("op_16127_pad_type_0"), val = string("valid")]; + tensor var_16127_strides_0 = const()[name = string("op_16127_strides_0"), val = tensor([1, 1])]; + tensor var_16127_pad_0 = const()[name = string("op_16127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16127_dilations_0 = const()[name = string("op_16127_dilations_0"), val = tensor([1, 1])]; + int32 var_16127_groups_0 = const()[name = string("op_16127_groups_0"), val = int32(1)]; + tensor var_16127 = conv(dilations = var_16127_dilations_0, groups = var_16127_groups_0, pad = var_16127_pad_0, pad_type = var_16127_pad_type_0, strides = var_16127_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_16067_cast_fp16)[name = string("op_16127")]; + tensor var_16132 = const()[name = string("op_16132"), val = tensor([1, 1, 1, 256])]; + tensor var_16133 = reshape(shape = var_16132, x = var_16127)[name = string("op_16133")]; + int32 var_16148 = const()[name = string("op_16148"), val = int32(-1)]; + fp16 const_878_promoted = const()[name = string("const_878_promoted"), val = fp16(-0x1p+0)]; + tensor var_16150 = mul(x = var_16089, y = const_878_promoted)[name = string("op_16150")]; + bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; + tensor input_465 = concat(axis = var_16148, interleave = input_465_interleave_0, values = (var_16089, var_16150))[name = string("input_465")]; + tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; + fp16 var_16145_to_fp16 = const()[name = string("op_16145_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_16145_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; + tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; + tensor var_16164_to_fp16 = const()[name = string("op_16164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658048)))]; + tensor q_47_cast_fp16 = mul(x = normed_559, y = var_16164_to_fp16)[name = string("q_47_cast_fp16")]; + int32 var_16175 = const()[name = string("op_16175"), val = int32(-1)]; + fp16 const_882_promoted = const()[name = string("const_882_promoted"), val = fp16(-0x1p+0)]; + tensor var_16177 = mul(x = var_16111, y = const_882_promoted)[name = string("op_16177")]; + bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; + tensor input_467 = concat(axis = var_16175, interleave = input_467_interleave_0, values = (var_16111, var_16177))[name = string("input_467")]; + tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; + fp16 var_16172_to_fp16 = const()[name = string("op_16172_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_16172_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; + tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; + tensor var_16191_to_fp16 = const()[name = string("op_16191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658624)))]; + tensor k_47_cast_fp16 = mul(x = normed_563, y = var_16191_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_16193_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_16193_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_888_promoted_to_fp16 = const()[name = string("const_888_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16214_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_888_promoted_to_fp16)[name = string("op_16214_cast_fp16")]; + int32 var_16216 = const()[name = string("op_16216"), val = int32(-1)]; + bool var_16217_interleave_0 = const()[name = string("op_16217_interleave_0"), val = bool(false)]; + tensor var_16217_cast_fp16 = concat(axis = var_16216, interleave = var_16217_interleave_0, values = (var_16214_cast_fp16, x1_93_cast_fp16))[name = string("op_16217_cast_fp16")]; + tensor var_16218_cast_fp16 = mul(x = var_16217_cast_fp16, y = sin_21_cast_fp16)[name = string("op_16218_cast_fp16")]; + tensor query_states_93_cast_fp16 = add(x = var_16193_cast_fp16, y = var_16218_cast_fp16)[name = string("query_states_93_cast_fp16")]; + tensor var_16221_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_16221_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16242_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_16242_cast_fp16")]; + int32 var_16244 = const()[name = string("op_16244"), val = int32(-1)]; + bool var_16245_interleave_0 = const()[name = string("op_16245_interleave_0"), val = bool(false)]; + tensor var_16245_cast_fp16 = concat(axis = var_16244, interleave = var_16245_interleave_0, values = (var_16242_cast_fp16, x1_95_cast_fp16))[name = string("op_16245_cast_fp16")]; + tensor var_16246_cast_fp16 = mul(x = var_16245_cast_fp16, y = sin_21_cast_fp16)[name = string("op_16246_cast_fp16")]; + tensor key_states_93_cast_fp16 = add(x = var_16221_cast_fp16, y = var_16246_cast_fp16)[name = string("key_states_93_cast_fp16")]; + tensor expand_dims_236 = const()[name = string("expand_dims_236"), val = tensor([3])]; + tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; + tensor expand_dims_239 = const()[name = string("expand_dims_239"), val = tensor([0])]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([4])]; + int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; + bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; + tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_236, expand_dims_237, current_pos, expand_dims_239))[name = string("concat_106")]; + tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; + tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; + int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; + bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; + tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_240, concat_107_values1_0, var_5043, concat_107_values3_0))[name = string("concat_107")]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_98")]; + tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([7])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor expand_dims_245 = const()[name = string("expand_dims_245"), val = tensor([0])]; + tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([8])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_242, expand_dims_243, current_pos, expand_dims_245))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_246, concat_111_values1_0, var_5043, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = var_16133, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_99")]; + tensor var_16301_begin_0 = const()[name = string("op_16301_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_16301_end_0 = const()[name = string("op_16301_end_0"), val = tensor([4, 1, 4096, 256])]; + tensor var_16301_end_mask_0 = const()[name = string("op_16301_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16301_cast_fp16 = slice_by_index(begin = var_16301_begin_0, end = var_16301_end_0, end_mask = var_16301_end_mask_0, x = coreml_update_state_99)[name = string("op_16301_cast_fp16")]; + tensor var_16308_begin_0 = const()[name = string("op_16308_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_16308_end_0 = const()[name = string("op_16308_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_16308_end_mask_0 = const()[name = string("op_16308_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16308_cast_fp16 = slice_by_index(begin = var_16308_begin_0, end = var_16308_end_0, end_mask = var_16308_end_mask_0, x = coreml_update_state_99)[name = string("op_16308_cast_fp16")]; + tensor var_16345 = const()[name = string("op_16345"), val = tensor([1, 4, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_16345, x = var_16301_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_16365 = const()[name = string("op_16365"), val = tensor([1, 4, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_16365, x = var_16308_cast_fp16)[name = string("x_379_cast_fp16")]; + bool var_16392_transpose_x_1 = const()[name = string("op_16392_transpose_x_1"), val = bool(false)]; + bool var_16392_transpose_y_1 = const()[name = string("op_16392_transpose_y_1"), val = bool(true)]; + tensor var_16392 = matmul(transpose_x = var_16392_transpose_x_1, transpose_y = var_16392_transpose_y_1, x = query_states_93_cast_fp16, y = x_373_cast_fp16)[name = string("op_16392")]; + fp16 var_16393_to_fp16 = const()[name = string("op_16393_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_139_cast_fp16 = mul(x = var_16392, y = var_16393_to_fp16)[name = string("attn_weights_139_cast_fp16")]; + tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; + int32 var_16428 = const()[name = string("op_16428"), val = int32(-1)]; + tensor attn_weights_143_cast_fp16 = softmax(axis = var_16428, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; + bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; + bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; + tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = x_379_cast_fp16)[name = string("attn_output_231_cast_fp16")]; + tensor var_16439_perm_0 = const()[name = string("op_16439_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16443 = const()[name = string("op_16443"), val = tensor([1, 1, 1024])]; + tensor var_16439_cast_fp16 = transpose(perm = var_16439_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_33")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_16443, x = var_16439_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_16448 = const()[name = string("op_16448"), val = tensor([0, 2, 1])]; + string var_16464_pad_type_0 = const()[name = string("op_16464_pad_type_0"), val = string("valid")]; + int32 var_16464_groups_0 = const()[name = string("op_16464_groups_0"), val = int32(1)]; + tensor var_16464_strides_0 = const()[name = string("op_16464_strides_0"), val = tensor([1])]; + tensor var_16464_pad_0 = const()[name = string("op_16464_pad_0"), val = tensor([0, 0])]; + tensor var_16464_dilations_0 = const()[name = string("op_16464_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832659200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833544000))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16449_cast_fp16 = transpose(perm = var_16448, x = attn_output_235_cast_fp16)[name = string("transpose_32")]; + tensor var_16464_cast_fp16 = conv(dilations = var_16464_dilations_0, groups = var_16464_groups_0, pad = var_16464_pad_0, pad_type = var_16464_pad_type_0, strides = var_16464_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_16449_cast_fp16)[name = string("op_16464_cast_fp16")]; + tensor var_16468 = const()[name = string("op_16468"), val = tensor([0, 2, 1])]; + int32 var_16479 = const()[name = string("op_16479"), val = int32(-1)]; + fp16 const_900_promoted_to_fp16 = const()[name = string("const_900_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_381_cast_fp16 = transpose(perm = var_16468, x = var_16464_cast_fp16)[name = string("transpose_31")]; + tensor var_16481_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_900_promoted_to_fp16)[name = string("op_16481_cast_fp16")]; + bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; + tensor input_471_cast_fp16 = concat(axis = var_16479, interleave = input_471_interleave_0, values = (hidden_states_381_cast_fp16, var_16481_cast_fp16))[name = string("input_471_cast_fp16")]; + tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; + fp16 var_16476_to_fp16 = const()[name = string("op_16476_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_16476_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; + tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; + tensor var_16495_to_fp16 = const()[name = string("op_16495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833580928)))]; + tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_16495_to_fp16)[name = string("attn_output_239_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = add(x = hidden_states_373_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + int32 var_16508 = const()[name = string("op_16508"), val = int32(-1)]; + fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16510_cast_fp16 = mul(x = hidden_states_383_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16510_cast_fp16")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473_cast_fp16 = concat(axis = var_16508, interleave = input_473_interleave_0, values = (hidden_states_383_cast_fp16, var_16510_cast_fp16))[name = string("input_473_cast_fp16")]; + tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; + fp16 var_16505_to_fp16 = const()[name = string("op_16505_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_16505_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; + tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; + tensor var_16524_to_fp16 = const()[name = string("op_16524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833583296)))]; + tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_16524_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_16536 = const()[name = string("op_16536"), val = tensor([0, 2, 1])]; + tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; + tensor var_16537_cast_fp16 = transpose(perm = var_16536, x = x_381_cast_fp16)[name = string("transpose_30")]; + tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_16537_cast_fp16)[name = string("input_475_cast_fp16")]; + string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; + tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; + tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; + int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833585664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839557696))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839778944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845750976))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; + string var_16562_mode_0 = const()[name = string("op_16562_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16562_cast_fp16 = gelu(mode = var_16562_mode_0, x = x_383_cast_fp16)[name = string("op_16562_cast_fp16")]; + tensor input_477_cast_fp16 = mul(x = var_16562_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845972224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851944256))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; + tensor var_16570_axes_0 = const()[name = string("op_16570_axes_0"), val = tensor([2])]; + tensor var_16570_cast_fp16 = squeeze(axes = var_16570_axes_0, x = e_47_cast_fp16)[name = string("op_16570_cast_fp16")]; + tensor var_16571 = const()[name = string("op_16571"), val = tensor([0, 2, 1])]; + int32 var_16582 = const()[name = string("op_16582"), val = int32(-1)]; + fp16 const_908_promoted_to_fp16 = const()[name = string("const_908_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_385_cast_fp16 = transpose(perm = var_16571, x = var_16570_cast_fp16)[name = string("transpose_29")]; + tensor var_16584_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_908_promoted_to_fp16)[name = string("op_16584_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_16582, interleave = input_479_interleave_0, values = (hidden_states_385_cast_fp16, var_16584_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; + fp16 var_16579_to_fp16 = const()[name = string("op_16579_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_16579_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; + tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; + tensor var_16598_to_fp16 = const()[name = string("op_16598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851981184)))]; + tensor hidden_states_387_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_16598_to_fp16)[name = string("hidden_states_387_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_383_cast_fp16, y = hidden_states_387_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + int32 var_16649 = const()[name = string("op_16649"), val = int32(-1)]; + fp16 const_912_promoted_to_fp16 = const()[name = string("const_912_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16651_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = const_912_promoted_to_fp16)[name = string("op_16651_cast_fp16")]; + bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; + tensor input_481_cast_fp16 = concat(axis = var_16649, interleave = input_481_interleave_0, values = (hidden_states_389_cast_fp16, var_16651_cast_fp16))[name = string("input_481_cast_fp16")]; + tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; + fp16 var_16646_to_fp16 = const()[name = string("op_16646_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_16646_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; + tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; + tensor var_16665_to_fp16 = const()[name = string("op_16665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851983552)))]; + tensor hidden_states_391_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_16665_to_fp16)[name = string("hidden_states_391_cast_fp16")]; + tensor var_16670 = const()[name = string("op_16670"), val = tensor([0, 2, 1])]; + tensor var_16673_axes_0 = const()[name = string("op_16673_axes_0"), val = tensor([2])]; + tensor var_16671_cast_fp16 = transpose(perm = var_16670, x = hidden_states_391_cast_fp16)[name = string("transpose_28")]; + tensor var_16673_cast_fp16 = expand_dims(axes = var_16673_axes_0, x = var_16671_cast_fp16)[name = string("op_16673_cast_fp16")]; + string var_16689_pad_type_0 = const()[name = string("op_16689_pad_type_0"), val = string("valid")]; + tensor var_16689_strides_0 = const()[name = string("op_16689_strides_0"), val = tensor([1, 1])]; + tensor var_16689_pad_0 = const()[name = string("op_16689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16689_dilations_0 = const()[name = string("op_16689_dilations_0"), val = tensor([1, 1])]; + int32 var_16689_groups_0 = const()[name = string("op_16689_groups_0"), val = int32(1)]; + tensor var_16689 = conv(dilations = var_16689_dilations_0, groups = var_16689_groups_0, pad = var_16689_pad_0, pad_type = var_16689_pad_type_0, strides = var_16689_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_16673_cast_fp16)[name = string("op_16689")]; + tensor var_16694 = const()[name = string("op_16694"), val = tensor([1, 4, 1, 256])]; + tensor var_16695 = reshape(shape = var_16694, x = var_16689)[name = string("op_16695")]; + string var_16711_pad_type_0 = const()[name = string("op_16711_pad_type_0"), val = string("valid")]; + tensor var_16711_strides_0 = const()[name = string("op_16711_strides_0"), val = tensor([1, 1])]; + tensor var_16711_pad_0 = const()[name = string("op_16711_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16711_dilations_0 = const()[name = string("op_16711_dilations_0"), val = tensor([1, 1])]; + int32 var_16711_groups_0 = const()[name = string("op_16711_groups_0"), val = int32(1)]; + tensor var_16711 = conv(dilations = var_16711_dilations_0, groups = var_16711_groups_0, pad = var_16711_pad_0, pad_type = var_16711_pad_type_0, strides = var_16711_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_16673_cast_fp16)[name = string("op_16711")]; + tensor var_16716 = const()[name = string("op_16716"), val = tensor([1, 1, 1, 256])]; + tensor var_16717 = reshape(shape = var_16716, x = var_16711)[name = string("op_16717")]; + string var_16733_pad_type_0 = const()[name = string("op_16733_pad_type_0"), val = string("valid")]; + tensor var_16733_strides_0 = const()[name = string("op_16733_strides_0"), val = tensor([1, 1])]; + tensor var_16733_pad_0 = const()[name = string("op_16733_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16733_dilations_0 = const()[name = string("op_16733_dilations_0"), val = tensor([1, 1])]; + int32 var_16733_groups_0 = const()[name = string("op_16733_groups_0"), val = int32(1)]; + tensor var_16733 = conv(dilations = var_16733_dilations_0, groups = var_16733_groups_0, pad = var_16733_pad_0, pad_type = var_16733_pad_type_0, strides = var_16733_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_16673_cast_fp16)[name = string("op_16733")]; + tensor var_16738 = const()[name = string("op_16738"), val = tensor([1, 1, 1, 256])]; + tensor var_16739 = reshape(shape = var_16738, x = var_16733)[name = string("op_16739")]; + int32 var_16754 = const()[name = string("op_16754"), val = int32(-1)]; + fp16 const_916_promoted = const()[name = string("const_916_promoted"), val = fp16(-0x1p+0)]; + tensor var_16756 = mul(x = var_16695, y = const_916_promoted)[name = string("op_16756")]; + bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; + tensor input_485 = concat(axis = var_16754, interleave = input_485_interleave_0, values = (var_16695, var_16756))[name = string("input_485")]; + tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; + fp16 var_16751_to_fp16 = const()[name = string("op_16751_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_16751_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; + tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; + tensor var_16770_to_fp16 = const()[name = string("op_16770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851985920)))]; + tensor q_49_cast_fp16 = mul(x = normed_583, y = var_16770_to_fp16)[name = string("q_49_cast_fp16")]; + int32 var_16781 = const()[name = string("op_16781"), val = int32(-1)]; + fp16 const_920_promoted = const()[name = string("const_920_promoted"), val = fp16(-0x1p+0)]; + tensor var_16783 = mul(x = var_16717, y = const_920_promoted)[name = string("op_16783")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487 = concat(axis = var_16781, interleave = input_487_interleave_0, values = (var_16717, var_16783))[name = string("input_487")]; + tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; + fp16 var_16778_to_fp16 = const()[name = string("op_16778_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_16778_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; + tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; + tensor var_16797_to_fp16 = const()[name = string("op_16797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851986496)))]; + tensor k_49_cast_fp16 = mul(x = normed_587, y = var_16797_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_16799_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16799_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_926_promoted_to_fp16 = const()[name = string("const_926_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16820_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_926_promoted_to_fp16)[name = string("op_16820_cast_fp16")]; + int32 var_16822 = const()[name = string("op_16822"), val = int32(-1)]; + bool var_16823_interleave_0 = const()[name = string("op_16823_interleave_0"), val = bool(false)]; + tensor var_16823_cast_fp16 = concat(axis = var_16822, interleave = var_16823_interleave_0, values = (var_16820_cast_fp16, x1_97_cast_fp16))[name = string("op_16823_cast_fp16")]; + tensor var_16824_cast_fp16 = mul(x = var_16823_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16824_cast_fp16")]; + tensor query_states_97_cast_fp16 = add(x = var_16799_cast_fp16, y = var_16824_cast_fp16)[name = string("query_states_97_cast_fp16")]; + tensor var_16827_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16827_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_929_promoted_to_fp16 = const()[name = string("const_929_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16848_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_929_promoted_to_fp16)[name = string("op_16848_cast_fp16")]; + int32 var_16850 = const()[name = string("op_16850"), val = int32(-1)]; + bool var_16851_interleave_0 = const()[name = string("op_16851_interleave_0"), val = bool(false)]; + tensor var_16851_cast_fp16 = concat(axis = var_16850, interleave = var_16851_interleave_0, values = (var_16848_cast_fp16, x1_99_cast_fp16))[name = string("op_16851_cast_fp16")]; + tensor var_16852_cast_fp16 = mul(x = var_16851_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16852_cast_fp16")]; + tensor key_states_97_cast_fp16 = add(x = var_16827_cast_fp16, y = var_16852_cast_fp16)[name = string("key_states_97_cast_fp16")]; + tensor key_slice_41_begin_0 = const()[name = string("key_slice_41_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor key_slice_41_end_0 = const()[name = string("key_slice_41_end_0"), val = tensor([21, 1, 512, 256])]; + tensor key_slice_41_end_mask_0 = const()[name = string("key_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_41_cast_fp16 = slice_by_index(begin = key_slice_41_begin_0, end = key_slice_41_end_0, end_mask = key_slice_41_end_mask_0, x = coreml_update_state_97)[name = string("key_slice_41_cast_fp16")]; + tensor key_tail_41_begin_0 = const()[name = string("key_tail_41_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_41_end_0 = const()[name = string("key_tail_41_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_41_cast_fp16 = slice_by_index(begin = key_tail_41_begin_0, end = key_tail_41_end_0, x = key_slice_41_cast_fp16)[name = string("key_tail_41_cast_fp16")]; + int32 var_16865 = const()[name = string("op_16865"), val = int32(2)]; + bool shifted_key_41_interleave_0 = const()[name = string("shifted_key_41_interleave_0"), val = bool(false)]; + tensor shifted_key_41_cast_fp16 = concat(axis = var_16865, interleave = shifted_key_41_interleave_0, values = (key_tail_41_cast_fp16, key_states_97_cast_fp16))[name = string("shifted_key_41_cast_fp16")]; + tensor concat_112 = const()[name = string("concat_112"), val = tensor([20, 0, 0, 0])]; + tensor concat_113 = const()[name = string("concat_113"), val = tensor([21, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_112, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_113, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = shifted_key_41_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_100")]; + tensor value_slice_41_begin_0 = const()[name = string("value_slice_41_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor value_slice_41_end_0 = const()[name = string("value_slice_41_end_0"), val = tensor([43, 1, 512, 256])]; + tensor value_slice_41_end_mask_0 = const()[name = string("value_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_41_cast_fp16 = slice_by_index(begin = value_slice_41_begin_0, end = value_slice_41_end_0, end_mask = value_slice_41_end_mask_0, x = coreml_update_state_100)[name = string("value_slice_41_cast_fp16")]; + tensor value_tail_41_begin_0 = const()[name = string("value_tail_41_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_41_end_0 = const()[name = string("value_tail_41_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_41_cast_fp16 = slice_by_index(begin = value_tail_41_begin_0, end = value_tail_41_end_0, x = value_slice_41_cast_fp16)[name = string("value_tail_41_cast_fp16")]; + int32 var_16899 = const()[name = string("op_16899"), val = int32(2)]; + bool shifted_value_41_interleave_0 = const()[name = string("shifted_value_41_interleave_0"), val = bool(false)]; + tensor shifted_value_41_cast_fp16 = concat(axis = var_16899, interleave = shifted_value_41_interleave_0, values = (value_tail_41_cast_fp16, var_16739))[name = string("shifted_value_41_cast_fp16")]; + tensor concat_114 = const()[name = string("concat_114"), val = tensor([42, 0, 0, 0])]; + tensor concat_115 = const()[name = string("concat_115"), val = tensor([43, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = shifted_value_41_cast_fp16, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_101")]; + tensor var_16927_begin_0 = const()[name = string("op_16927_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_16927_end_0 = const()[name = string("op_16927_end_0"), val = tensor([21, 1, 512, 256])]; + tensor var_16927_end_mask_0 = const()[name = string("op_16927_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16927_cast_fp16 = slice_by_index(begin = var_16927_begin_0, end = var_16927_end_0, end_mask = var_16927_end_mask_0, x = coreml_update_state_101)[name = string("op_16927_cast_fp16")]; + tensor var_16934_begin_0 = const()[name = string("op_16934_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_16934_end_0 = const()[name = string("op_16934_end_0"), val = tensor([43, 1, 512, 256])]; + tensor var_16934_end_mask_0 = const()[name = string("op_16934_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16934_cast_fp16 = slice_by_index(begin = var_16934_begin_0, end = var_16934_end_0, end_mask = var_16934_end_mask_0, x = coreml_update_state_101)[name = string("op_16934_cast_fp16")]; + tensor var_16971 = const()[name = string("op_16971"), val = tensor([1, 4, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_16971, x = var_16927_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_16991 = const()[name = string("op_16991"), val = tensor([1, 4, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_16991, x = var_16934_cast_fp16)[name = string("x_395_cast_fp16")]; + bool var_17018_transpose_x_1 = const()[name = string("op_17018_transpose_x_1"), val = bool(false)]; + bool var_17018_transpose_y_1 = const()[name = string("op_17018_transpose_y_1"), val = bool(true)]; + tensor var_17018 = matmul(transpose_x = var_17018_transpose_x_1, transpose_y = var_17018_transpose_y_1, x = query_states_97_cast_fp16, y = x_389_cast_fp16)[name = string("op_17018")]; + fp16 var_17019_to_fp16 = const()[name = string("op_17019_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_145_cast_fp16 = mul(x = var_17018, y = var_17019_to_fp16)[name = string("attn_weights_145_cast_fp16")]; + tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = var_2105)[name = string("attn_weights_147_cast_fp16")]; + int32 var_17054 = const()[name = string("op_17054"), val = int32(-1)]; + tensor attn_weights_149_cast_fp16 = softmax(axis = var_17054, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; + bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; + bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; + tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = x_395_cast_fp16)[name = string("attn_output_241_cast_fp16")]; + tensor var_17065_perm_0 = const()[name = string("op_17065_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17069 = const()[name = string("op_17069"), val = tensor([1, 1, 1024])]; + tensor var_17065_cast_fp16 = transpose(perm = var_17065_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_17069, x = var_17065_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_17074 = const()[name = string("op_17074"), val = tensor([0, 2, 1])]; + string var_17090_pad_type_0 = const()[name = string("op_17090_pad_type_0"), val = string("valid")]; + int32 var_17090_groups_0 = const()[name = string("op_17090_groups_0"), val = int32(1)]; + tensor var_17090_strides_0 = const()[name = string("op_17090_strides_0"), val = tensor([1])]; + tensor var_17090_pad_0 = const()[name = string("op_17090_pad_0"), val = tensor([0, 0])]; + tensor var_17090_dilations_0 = const()[name = string("op_17090_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851987072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852871872))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17075_cast_fp16 = transpose(perm = var_17074, x = attn_output_245_cast_fp16)[name = string("transpose_26")]; + tensor var_17090_cast_fp16 = conv(dilations = var_17090_dilations_0, groups = var_17090_groups_0, pad = var_17090_pad_0, pad_type = var_17090_pad_type_0, strides = var_17090_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_17075_cast_fp16)[name = string("op_17090_cast_fp16")]; + tensor var_17094 = const()[name = string("op_17094"), val = tensor([0, 2, 1])]; + int32 var_17105 = const()[name = string("op_17105"), val = int32(-1)]; + fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_397_cast_fp16 = transpose(perm = var_17094, x = var_17090_cast_fp16)[name = string("transpose_25")]; + tensor var_17107_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_17107_cast_fp16")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491_cast_fp16 = concat(axis = var_17105, interleave = input_491_interleave_0, values = (hidden_states_397_cast_fp16, var_17107_cast_fp16))[name = string("input_491_cast_fp16")]; + tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; + fp16 var_17102_to_fp16 = const()[name = string("op_17102_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_17102_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; + tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; + tensor var_17121_to_fp16 = const()[name = string("op_17121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852908800)))]; + tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_17121_to_fp16)[name = string("attn_output_249_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + int32 var_17134 = const()[name = string("op_17134"), val = int32(-1)]; + fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17136_cast_fp16 = mul(x = hidden_states_399_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_17136_cast_fp16")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493_cast_fp16 = concat(axis = var_17134, interleave = input_493_interleave_0, values = (hidden_states_399_cast_fp16, var_17136_cast_fp16))[name = string("input_493_cast_fp16")]; + tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; + fp16 var_17131_to_fp16 = const()[name = string("op_17131_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_17131_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; + tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; + tensor var_17150_to_fp16 = const()[name = string("op_17150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852911168)))]; + tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_17150_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_17162 = const()[name = string("op_17162"), val = tensor([0, 2, 1])]; + tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; + tensor var_17163_cast_fp16 = transpose(perm = var_17162, x = x_397_cast_fp16)[name = string("transpose_24")]; + tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_17163_cast_fp16)[name = string("input_495_cast_fp16")]; + string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; + tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; + tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; + int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852913536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858885568))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859106816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865078848))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; + string var_17188_mode_0 = const()[name = string("op_17188_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17188_cast_fp16 = gelu(mode = var_17188_mode_0, x = x_399_cast_fp16)[name = string("op_17188_cast_fp16")]; + tensor input_497_cast_fp16 = mul(x = var_17188_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865300096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871272128))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; + tensor var_17196_axes_0 = const()[name = string("op_17196_axes_0"), val = tensor([2])]; + tensor var_17196_cast_fp16 = squeeze(axes = var_17196_axes_0, x = e_49_cast_fp16)[name = string("op_17196_cast_fp16")]; + tensor var_17197 = const()[name = string("op_17197"), val = tensor([0, 2, 1])]; + int32 var_17208 = const()[name = string("op_17208"), val = int32(-1)]; + fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_401_cast_fp16 = transpose(perm = var_17197, x = var_17196_cast_fp16)[name = string("transpose_23")]; + tensor var_17210_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_17210_cast_fp16")]; + bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; + tensor input_499_cast_fp16 = concat(axis = var_17208, interleave = input_499_interleave_0, values = (hidden_states_401_cast_fp16, var_17210_cast_fp16))[name = string("input_499_cast_fp16")]; + tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; + fp16 var_17205_to_fp16 = const()[name = string("op_17205_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_17205_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; + tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; + tensor var_17224_to_fp16 = const()[name = string("op_17224_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871309056)))]; + tensor hidden_states_403_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_17224_to_fp16)[name = string("hidden_states_403_cast_fp16")]; + tensor hidden_states_405_cast_fp16 = add(x = hidden_states_399_cast_fp16, y = hidden_states_403_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + int32 var_17275 = const()[name = string("op_17275"), val = int32(-1)]; + fp16 const_950_promoted_to_fp16 = const()[name = string("const_950_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17277_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = const_950_promoted_to_fp16)[name = string("op_17277_cast_fp16")]; + bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; + tensor input_501_cast_fp16 = concat(axis = var_17275, interleave = input_501_interleave_0, values = (hidden_states_405_cast_fp16, var_17277_cast_fp16))[name = string("input_501_cast_fp16")]; + tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; + fp16 var_17272_to_fp16 = const()[name = string("op_17272_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_17272_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; + tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; + tensor var_17291_to_fp16 = const()[name = string("op_17291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871311424)))]; + tensor hidden_states_407_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_17291_to_fp16)[name = string("hidden_states_407_cast_fp16")]; + tensor var_17296 = const()[name = string("op_17296"), val = tensor([0, 2, 1])]; + tensor var_17299_axes_0 = const()[name = string("op_17299_axes_0"), val = tensor([2])]; + tensor var_17297_cast_fp16 = transpose(perm = var_17296, x = hidden_states_407_cast_fp16)[name = string("transpose_22")]; + tensor var_17299_cast_fp16 = expand_dims(axes = var_17299_axes_0, x = var_17297_cast_fp16)[name = string("op_17299_cast_fp16")]; + string var_17315_pad_type_0 = const()[name = string("op_17315_pad_type_0"), val = string("valid")]; + tensor var_17315_strides_0 = const()[name = string("op_17315_strides_0"), val = tensor([1, 1])]; + tensor var_17315_pad_0 = const()[name = string("op_17315_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17315_dilations_0 = const()[name = string("op_17315_dilations_0"), val = tensor([1, 1])]; + int32 var_17315_groups_0 = const()[name = string("op_17315_groups_0"), val = int32(1)]; + tensor var_17315 = conv(dilations = var_17315_dilations_0, groups = var_17315_groups_0, pad = var_17315_pad_0, pad_type = var_17315_pad_type_0, strides = var_17315_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_17299_cast_fp16)[name = string("op_17315")]; + tensor var_17320 = const()[name = string("op_17320"), val = tensor([1, 4, 1, 256])]; + tensor var_17321 = reshape(shape = var_17320, x = var_17315)[name = string("op_17321")]; + string var_17337_pad_type_0 = const()[name = string("op_17337_pad_type_0"), val = string("valid")]; + tensor var_17337_strides_0 = const()[name = string("op_17337_strides_0"), val = tensor([1, 1])]; + tensor var_17337_pad_0 = const()[name = string("op_17337_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17337_dilations_0 = const()[name = string("op_17337_dilations_0"), val = tensor([1, 1])]; + int32 var_17337_groups_0 = const()[name = string("op_17337_groups_0"), val = int32(1)]; + tensor var_17337 = conv(dilations = var_17337_dilations_0, groups = var_17337_groups_0, pad = var_17337_pad_0, pad_type = var_17337_pad_type_0, strides = var_17337_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_17299_cast_fp16)[name = string("op_17337")]; + tensor var_17342 = const()[name = string("op_17342"), val = tensor([1, 1, 1, 256])]; + tensor var_17343 = reshape(shape = var_17342, x = var_17337)[name = string("op_17343")]; + string var_17359_pad_type_0 = const()[name = string("op_17359_pad_type_0"), val = string("valid")]; + tensor var_17359_strides_0 = const()[name = string("op_17359_strides_0"), val = tensor([1, 1])]; + tensor var_17359_pad_0 = const()[name = string("op_17359_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17359_dilations_0 = const()[name = string("op_17359_dilations_0"), val = tensor([1, 1])]; + int32 var_17359_groups_0 = const()[name = string("op_17359_groups_0"), val = int32(1)]; + tensor var_17359 = conv(dilations = var_17359_dilations_0, groups = var_17359_groups_0, pad = var_17359_pad_0, pad_type = var_17359_pad_type_0, strides = var_17359_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_17299_cast_fp16)[name = string("op_17359")]; + tensor var_17364 = const()[name = string("op_17364"), val = tensor([1, 1, 1, 256])]; + tensor var_17365 = reshape(shape = var_17364, x = var_17359)[name = string("op_17365")]; + int32 var_17380 = const()[name = string("op_17380"), val = int32(-1)]; + fp16 const_954_promoted = const()[name = string("const_954_promoted"), val = fp16(-0x1p+0)]; + tensor var_17382 = mul(x = var_17321, y = const_954_promoted)[name = string("op_17382")]; + bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; + tensor input_505 = concat(axis = var_17380, interleave = input_505_interleave_0, values = (var_17321, var_17382))[name = string("input_505")]; + tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; + fp16 var_17377_to_fp16 = const()[name = string("op_17377_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_17377_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; + tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 1, 256])]; + tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; + tensor var_17396_to_fp16 = const()[name = string("op_17396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871313792)))]; + tensor q_cast_fp16 = mul(x = normed_607, y = var_17396_to_fp16)[name = string("q_cast_fp16")]; + int32 var_17407 = const()[name = string("op_17407"), val = int32(-1)]; + fp16 const_958_promoted = const()[name = string("const_958_promoted"), val = fp16(-0x1p+0)]; + tensor var_17409 = mul(x = var_17343, y = const_958_promoted)[name = string("op_17409")]; + bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; + tensor input_507 = concat(axis = var_17407, interleave = input_507_interleave_0, values = (var_17343, var_17409))[name = string("input_507")]; + tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; + fp16 var_17404_to_fp16 = const()[name = string("op_17404_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_17404_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; + tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 1, 256])]; + tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; + tensor var_17423_to_fp16 = const()[name = string("op_17423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314368)))]; + tensor k_cast_fp16 = mul(x = normed_611, y = var_17423_to_fp16)[name = string("k_cast_fp16")]; + tensor var_17425_cast_fp16 = mul(x = q_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17425_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 1, 128])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 1, 256])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_964_promoted_to_fp16 = const()[name = string("const_964_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17446_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_964_promoted_to_fp16)[name = string("op_17446_cast_fp16")]; + int32 var_17448 = const()[name = string("op_17448"), val = int32(-1)]; + bool var_17449_interleave_0 = const()[name = string("op_17449_interleave_0"), val = bool(false)]; + tensor var_17449_cast_fp16 = concat(axis = var_17448, interleave = var_17449_interleave_0, values = (var_17446_cast_fp16, x1_101_cast_fp16))[name = string("op_17449_cast_fp16")]; + tensor var_17450_cast_fp16 = mul(x = var_17449_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17450_cast_fp16")]; + tensor query_states_101_cast_fp16 = add(x = var_17425_cast_fp16, y = var_17450_cast_fp16)[name = string("query_states_101_cast_fp16")]; + tensor var_17453_cast_fp16 = mul(x = k_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17453_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 1, 128])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 1, 256])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_967_promoted_to_fp16 = const()[name = string("const_967_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17474_cast_fp16 = mul(x = x2_cast_fp16, y = const_967_promoted_to_fp16)[name = string("op_17474_cast_fp16")]; + int32 var_17476 = const()[name = string("op_17476"), val = int32(-1)]; + bool var_17477_interleave_0 = const()[name = string("op_17477_interleave_0"), val = bool(false)]; + tensor var_17477_cast_fp16 = concat(axis = var_17476, interleave = var_17477_interleave_0, values = (var_17474_cast_fp16, x1_cast_fp16))[name = string("op_17477_cast_fp16")]; + tensor var_17478_cast_fp16 = mul(x = var_17477_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17478_cast_fp16")]; + tensor key_states_101_cast_fp16 = add(x = var_17453_cast_fp16, y = var_17478_cast_fp16)[name = string("key_states_101_cast_fp16")]; + tensor key_slice_begin_0 = const()[name = string("key_slice_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor key_slice_end_0 = const()[name = string("key_slice_end_0"), val = tensor([22, 1, 512, 256])]; + tensor key_slice_end_mask_0 = const()[name = string("key_slice_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_cast_fp16 = slice_by_index(begin = key_slice_begin_0, end = key_slice_end_0, end_mask = key_slice_end_mask_0, x = coreml_update_state_101)[name = string("key_slice_cast_fp16")]; + tensor key_tail_begin_0 = const()[name = string("key_tail_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor key_tail_end_0 = const()[name = string("key_tail_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_tail_cast_fp16 = slice_by_index(begin = key_tail_begin_0, end = key_tail_end_0, x = key_slice_cast_fp16)[name = string("key_tail_cast_fp16")]; + int32 var_17491 = const()[name = string("op_17491"), val = int32(2)]; + bool shifted_key_interleave_0 = const()[name = string("shifted_key_interleave_0"), val = bool(false)]; + tensor shifted_key_cast_fp16 = concat(axis = var_17491, interleave = shifted_key_interleave_0, values = (key_tail_cast_fp16, key_states_101_cast_fp16))[name = string("shifted_key_cast_fp16")]; + tensor concat_116 = const()[name = string("concat_116"), val = tensor([21, 0, 0, 0])]; + tensor concat_117 = const()[name = string("concat_117"), val = tensor([22, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_116, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_117, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = shifted_key_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_102")]; + tensor value_slice_begin_0 = const()[name = string("value_slice_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor value_slice_end_0 = const()[name = string("value_slice_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_slice_end_mask_0 = const()[name = string("value_slice_end_mask_0"), val = tensor([true, true, true, true])]; + tensor value_slice_cast_fp16 = slice_by_index(begin = value_slice_begin_0, end = value_slice_end_0, end_mask = value_slice_end_mask_0, x = coreml_update_state_102)[name = string("value_slice_cast_fp16")]; + tensor value_tail_begin_0 = const()[name = string("value_tail_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor value_tail_end_0 = const()[name = string("value_tail_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_tail_cast_fp16 = slice_by_index(begin = value_tail_begin_0, end = value_tail_end_0, x = value_slice_cast_fp16)[name = string("value_tail_cast_fp16")]; + int32 var_17525 = const()[name = string("op_17525"), val = int32(2)]; + bool shifted_value_interleave_0 = const()[name = string("shifted_value_interleave_0"), val = bool(false)]; + tensor shifted_value_cast_fp16 = concat(axis = var_17525, interleave = shifted_value_interleave_0, values = (value_tail_cast_fp16, var_17365))[name = string("shifted_value_cast_fp16")]; + tensor concat_118 = const()[name = string("concat_118"), val = tensor([43, 0, 0, 0])]; + tensor concat_119 = const()[name = string("concat_119"), val = tensor([44, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = shifted_value_cast_fp16, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_103")]; + tensor var_17553_begin_0 = const()[name = string("op_17553_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_17553_end_0 = const()[name = string("op_17553_end_0"), val = tensor([22, 1, 512, 256])]; + tensor var_17553_end_mask_0 = const()[name = string("op_17553_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17553_cast_fp16 = slice_by_index(begin = var_17553_begin_0, end = var_17553_end_0, end_mask = var_17553_end_mask_0, x = coreml_update_state_103)[name = string("op_17553_cast_fp16")]; + tensor var_17560_begin_0 = const()[name = string("op_17560_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_17560_end_0 = const()[name = string("op_17560_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_17560_end_mask_0 = const()[name = string("op_17560_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_17560_cast_fp16 = slice_by_index(begin = var_17560_begin_0, end = var_17560_end_0, end_mask = var_17560_end_mask_0, x = coreml_update_state_103)[name = string("op_17560_cast_fp16")]; + tensor var_17597 = const()[name = string("op_17597"), val = tensor([1, 4, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_17597, x = var_17553_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_17617 = const()[name = string("op_17617"), val = tensor([1, 4, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_17617, x = var_17560_cast_fp16)[name = string("x_411_cast_fp16")]; + bool var_17644_transpose_x_1 = const()[name = string("op_17644_transpose_x_1"), val = bool(false)]; + bool var_17644_transpose_y_1 = const()[name = string("op_17644_transpose_y_1"), val = bool(true)]; + tensor var_17644 = matmul(transpose_x = var_17644_transpose_x_1, transpose_y = var_17644_transpose_y_1, x = query_states_101_cast_fp16, y = x_405_cast_fp16)[name = string("op_17644")]; + fp16 var_17645_to_fp16 = const()[name = string("op_17645_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_151_cast_fp16 = mul(x = var_17644, y = var_17645_to_fp16)[name = string("attn_weights_151_cast_fp16")]; + tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = var_2105)[name = string("attn_weights_153_cast_fp16")]; + int32 var_17680 = const()[name = string("op_17680"), val = int32(-1)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_17680, x = attn_weights_153_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; + bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; + tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_cast_fp16, y = x_411_cast_fp16)[name = string("attn_output_251_cast_fp16")]; + tensor var_17691_perm_0 = const()[name = string("op_17691_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17695 = const()[name = string("op_17695"), val = tensor([1, 1, 1024])]; + tensor var_17691_cast_fp16 = transpose(perm = var_17691_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_21")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_17695, x = var_17691_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_17700 = const()[name = string("op_17700"), val = tensor([0, 2, 1])]; + string var_17716_pad_type_0 = const()[name = string("op_17716_pad_type_0"), val = string("valid")]; + int32 var_17716_groups_0 = const()[name = string("op_17716_groups_0"), val = int32(1)]; + tensor var_17716_strides_0 = const()[name = string("op_17716_strides_0"), val = tensor([1])]; + tensor var_17716_pad_0 = const()[name = string("op_17716_pad_0"), val = tensor([0, 0])]; + tensor var_17716_dilations_0 = const()[name = string("op_17716_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872199744))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17701_cast_fp16 = transpose(perm = var_17700, x = attn_output_255_cast_fp16)[name = string("transpose_20")]; + tensor var_17716_cast_fp16 = conv(dilations = var_17716_dilations_0, groups = var_17716_groups_0, pad = var_17716_pad_0, pad_type = var_17716_pad_type_0, strides = var_17716_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_17701_cast_fp16)[name = string("op_17716_cast_fp16")]; + tensor var_17720 = const()[name = string("op_17720"), val = tensor([0, 2, 1])]; + int32 var_17731 = const()[name = string("op_17731"), val = int32(-1)]; + fp16 const_976_promoted_to_fp16 = const()[name = string("const_976_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_413_cast_fp16 = transpose(perm = var_17720, x = var_17716_cast_fp16)[name = string("transpose_19")]; + tensor var_17733_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_976_promoted_to_fp16)[name = string("op_17733_cast_fp16")]; + bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; + tensor input_511_cast_fp16 = concat(axis = var_17731, interleave = input_511_interleave_0, values = (hidden_states_413_cast_fp16, var_17733_cast_fp16))[name = string("input_511_cast_fp16")]; + tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; + fp16 var_17728_to_fp16 = const()[name = string("op_17728_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_17728_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; + tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; + tensor var_17747_to_fp16 = const()[name = string("op_17747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872236672)))]; + tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_17747_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; + int32 var_17760 = const()[name = string("op_17760"), val = int32(-1)]; + fp16 const_980_promoted_to_fp16 = const()[name = string("const_980_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17762_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = const_980_promoted_to_fp16)[name = string("op_17762_cast_fp16")]; + bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; + tensor input_513_cast_fp16 = concat(axis = var_17760, interleave = input_513_interleave_0, values = (hidden_states_415_cast_fp16, var_17762_cast_fp16))[name = string("input_513_cast_fp16")]; + tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; + fp16 var_17757_to_fp16 = const()[name = string("op_17757_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_17757_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; + tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; + tensor var_17776_to_fp16 = const()[name = string("op_17776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872239040)))]; + tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_17776_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_17788 = const()[name = string("op_17788"), val = tensor([0, 2, 1])]; + tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; + tensor var_17789_cast_fp16 = transpose(perm = var_17788, x = x_413_cast_fp16)[name = string("transpose_18")]; + tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_17789_cast_fp16)[name = string("input_515_cast_fp16")]; + string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; + tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; + tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; + int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872241408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878213440))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878434688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884406720))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; + string var_17814_mode_0 = const()[name = string("op_17814_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17814_cast_fp16 = gelu(mode = var_17814_mode_0, x = x_cast_fp16)[name = string("op_17814_cast_fp16")]; + tensor input_517_cast_fp16 = mul(x = var_17814_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884627968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890600000))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; + tensor var_17822_axes_0 = const()[name = string("op_17822_axes_0"), val = tensor([2])]; + tensor var_17822_cast_fp16 = squeeze(axes = var_17822_axes_0, x = e_cast_fp16)[name = string("op_17822_cast_fp16")]; + tensor var_17823 = const()[name = string("op_17823"), val = tensor([0, 2, 1])]; + int32 var_17834 = const()[name = string("op_17834"), val = int32(-1)]; + fp16 const_984_promoted_to_fp16 = const()[name = string("const_984_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_417_cast_fp16 = transpose(perm = var_17823, x = var_17822_cast_fp16)[name = string("transpose_17")]; + tensor var_17836_cast_fp16 = mul(x = hidden_states_417_cast_fp16, y = const_984_promoted_to_fp16)[name = string("op_17836_cast_fp16")]; + bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; + tensor input_519_cast_fp16 = concat(axis = var_17834, interleave = input_519_interleave_0, values = (hidden_states_417_cast_fp16, var_17836_cast_fp16))[name = string("input_519_cast_fp16")]; + tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; + fp16 var_17831_to_fp16 = const()[name = string("op_17831_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_17831_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; + tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; + tensor var_17850_to_fp16 = const()[name = string("op_17850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890636928)))]; + tensor hidden_states_419_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_17850_to_fp16)[name = string("hidden_states_419_cast_fp16")]; + tensor hidden_states_421_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; + int32 var_17863 = const()[name = string("op_17863"), val = int32(-1)]; + fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17865_cast_fp16 = mul(x = hidden_states_421_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_17865_cast_fp16")]; + bool input_521_interleave_0 = const()[name = string("input_521_interleave_0"), val = bool(false)]; + tensor input_521_cast_fp16 = concat(axis = var_17863, interleave = input_521_interleave_0, values = (hidden_states_421_cast_fp16, var_17865_cast_fp16))[name = string("input_521_cast_fp16")]; + tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; + fp16 var_17860_to_fp16 = const()[name = string("op_17860_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_17860_to_fp16, x = input_521_cast_fp16)[name = string("normed_625_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 1, 1152])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; + tensor var_17879_to_fp16 = const()[name = string("op_17879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890639296)))]; + tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_17879_to_fp16)[name = string("hidden_states_cast_fp16")]; + tensor var_17884 = const()[name = string("op_17884"), val = tensor([0, 2, 1])]; + tensor input_axes_0 = const()[name = string("input_axes_0"), val = tensor([2])]; + tensor var_17885_cast_fp16 = transpose(perm = var_17884, x = hidden_states_cast_fp16)[name = string("transpose_16")]; + tensor input_cast_fp16 = expand_dims(axes = input_axes_0, x = var_17885_cast_fp16)[name = string("input_cast_fp16")]; + string var_17898_pad_type_0 = const()[name = string("op_17898_pad_type_0"), val = string("valid")]; + tensor var_17898_strides_0 = const()[name = string("op_17898_strides_0"), val = tensor([1, 1])]; + tensor var_17898_pad_0 = const()[name = string("op_17898_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17898_dilations_0 = const()[name = string("op_17898_dilations_0"), val = tensor([1, 1])]; + int32 var_17898_groups_0 = const()[name = string("op_17898_groups_0"), val = int32(1)]; + tensor model_lm_head16_1_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890641664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(904797504))))[name = string("model_lm_head16_1_weight_promoted_to_fp16_palettized")]; + tensor var_17898_cast_fp16 = conv(dilations = var_17898_dilations_0, groups = var_17898_groups_0, pad = var_17898_pad_0, pad_type = var_17898_pad_type_0, strides = var_17898_strides_0, weight = model_lm_head16_1_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17898_cast_fp16")]; + tensor var_17900_axes_0 = const()[name = string("op_17900_axes_0"), val = tensor([2])]; + tensor var_17900_cast_fp16 = squeeze(axes = var_17900_axes_0, x = var_17898_cast_fp16)[name = string("op_17900_cast_fp16")]; + tensor logits_1_perm_0 = const()[name = string("logits_1_perm_0"), val = tensor([0, 2, 1])]; + string var_17914_pad_type_0 = const()[name = string("op_17914_pad_type_0"), val = string("valid")]; + tensor var_17914_strides_0 = const()[name = string("op_17914_strides_0"), val = tensor([1, 1])]; + tensor var_17914_pad_0 = const()[name = string("op_17914_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17914_dilations_0 = const()[name = string("op_17914_dilations_0"), val = tensor([1, 1])]; + int32 var_17914_groups_0 = const()[name = string("op_17914_groups_0"), val = int32(1)]; + tensor model_lm_head16_2_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905321856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(919477696))))[name = string("model_lm_head16_2_weight_promoted_to_fp16_palettized")]; + tensor var_17914_cast_fp16 = conv(dilations = var_17914_dilations_0, groups = var_17914_groups_0, pad = var_17914_pad_0, pad_type = var_17914_pad_type_0, strides = var_17914_strides_0, weight = model_lm_head16_2_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17914_cast_fp16")]; + tensor var_17916_axes_0 = const()[name = string("op_17916_axes_0"), val = tensor([2])]; + tensor var_17916_cast_fp16 = squeeze(axes = var_17916_axes_0, x = var_17914_cast_fp16)[name = string("op_17916_cast_fp16")]; + tensor logits_3_perm_0 = const()[name = string("logits_3_perm_0"), val = tensor([0, 2, 1])]; + string var_17930_pad_type_0 = const()[name = string("op_17930_pad_type_0"), val = string("valid")]; + tensor var_17930_strides_0 = const()[name = string("op_17930_strides_0"), val = tensor([1, 1])]; + tensor var_17930_pad_0 = const()[name = string("op_17930_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17930_dilations_0 = const()[name = string("op_17930_dilations_0"), val = tensor([1, 1])]; + int32 var_17930_groups_0 = const()[name = string("op_17930_groups_0"), val = int32(1)]; + tensor model_lm_head16_3_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920002048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934157888))))[name = string("model_lm_head16_3_weight_promoted_to_fp16_palettized")]; + tensor var_17930_cast_fp16 = conv(dilations = var_17930_dilations_0, groups = var_17930_groups_0, pad = var_17930_pad_0, pad_type = var_17930_pad_type_0, strides = var_17930_strides_0, weight = model_lm_head16_3_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17930_cast_fp16")]; + tensor var_17932_axes_0 = const()[name = string("op_17932_axes_0"), val = tensor([2])]; + tensor var_17932_cast_fp16 = squeeze(axes = var_17932_axes_0, x = var_17930_cast_fp16)[name = string("op_17932_cast_fp16")]; + tensor logits_5_perm_0 = const()[name = string("logits_5_perm_0"), val = tensor([0, 2, 1])]; + string var_17946_pad_type_0 = const()[name = string("op_17946_pad_type_0"), val = string("valid")]; + tensor var_17946_strides_0 = const()[name = string("op_17946_strides_0"), val = tensor([1, 1])]; + tensor var_17946_pad_0 = const()[name = string("op_17946_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17946_dilations_0 = const()[name = string("op_17946_dilations_0"), val = tensor([1, 1])]; + int32 var_17946_groups_0 = const()[name = string("op_17946_groups_0"), val = int32(1)]; + tensor model_lm_head16_4_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934682240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948838080))))[name = string("model_lm_head16_4_weight_promoted_to_fp16_palettized")]; + tensor var_17946_cast_fp16 = conv(dilations = var_17946_dilations_0, groups = var_17946_groups_0, pad = var_17946_pad_0, pad_type = var_17946_pad_type_0, strides = var_17946_strides_0, weight = model_lm_head16_4_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17946_cast_fp16")]; + tensor var_17948_axes_0 = const()[name = string("op_17948_axes_0"), val = tensor([2])]; + tensor var_17948_cast_fp16 = squeeze(axes = var_17948_axes_0, x = var_17946_cast_fp16)[name = string("op_17948_cast_fp16")]; + tensor logits_7_perm_0 = const()[name = string("logits_7_perm_0"), val = tensor([0, 2, 1])]; + string var_17962_pad_type_0 = const()[name = string("op_17962_pad_type_0"), val = string("valid")]; + tensor var_17962_strides_0 = const()[name = string("op_17962_strides_0"), val = tensor([1, 1])]; + tensor var_17962_pad_0 = const()[name = string("op_17962_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17962_dilations_0 = const()[name = string("op_17962_dilations_0"), val = tensor([1, 1])]; + int32 var_17962_groups_0 = const()[name = string("op_17962_groups_0"), val = int32(1)]; + tensor model_lm_head16_5_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949362432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(963518272))))[name = string("model_lm_head16_5_weight_promoted_to_fp16_palettized")]; + tensor var_17962_cast_fp16 = conv(dilations = var_17962_dilations_0, groups = var_17962_groups_0, pad = var_17962_pad_0, pad_type = var_17962_pad_type_0, strides = var_17962_strides_0, weight = model_lm_head16_5_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17962_cast_fp16")]; + tensor var_17964_axes_0 = const()[name = string("op_17964_axes_0"), val = tensor([2])]; + tensor var_17964_cast_fp16 = squeeze(axes = var_17964_axes_0, x = var_17962_cast_fp16)[name = string("op_17964_cast_fp16")]; + tensor logits_9_perm_0 = const()[name = string("logits_9_perm_0"), val = tensor([0, 2, 1])]; + string var_17978_pad_type_0 = const()[name = string("op_17978_pad_type_0"), val = string("valid")]; + tensor var_17978_strides_0 = const()[name = string("op_17978_strides_0"), val = tensor([1, 1])]; + tensor var_17978_pad_0 = const()[name = string("op_17978_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17978_dilations_0 = const()[name = string("op_17978_dilations_0"), val = tensor([1, 1])]; + int32 var_17978_groups_0 = const()[name = string("op_17978_groups_0"), val = int32(1)]; + tensor model_lm_head16_6_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964042624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978198464))))[name = string("model_lm_head16_6_weight_promoted_to_fp16_palettized")]; + tensor var_17978_cast_fp16 = conv(dilations = var_17978_dilations_0, groups = var_17978_groups_0, pad = var_17978_pad_0, pad_type = var_17978_pad_type_0, strides = var_17978_strides_0, weight = model_lm_head16_6_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17978_cast_fp16")]; + tensor var_17980_axes_0 = const()[name = string("op_17980_axes_0"), val = tensor([2])]; + tensor var_17980_cast_fp16 = squeeze(axes = var_17980_axes_0, x = var_17978_cast_fp16)[name = string("op_17980_cast_fp16")]; + tensor logits_11_perm_0 = const()[name = string("logits_11_perm_0"), val = tensor([0, 2, 1])]; + string var_17994_pad_type_0 = const()[name = string("op_17994_pad_type_0"), val = string("valid")]; + tensor var_17994_strides_0 = const()[name = string("op_17994_strides_0"), val = tensor([1, 1])]; + tensor var_17994_pad_0 = const()[name = string("op_17994_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17994_dilations_0 = const()[name = string("op_17994_dilations_0"), val = tensor([1, 1])]; + int32 var_17994_groups_0 = const()[name = string("op_17994_groups_0"), val = int32(1)]; + tensor model_lm_head16_7_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978722816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(992878656))))[name = string("model_lm_head16_7_weight_promoted_to_fp16_palettized")]; + tensor var_17994_cast_fp16 = conv(dilations = var_17994_dilations_0, groups = var_17994_groups_0, pad = var_17994_pad_0, pad_type = var_17994_pad_type_0, strides = var_17994_strides_0, weight = model_lm_head16_7_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_17994_cast_fp16")]; + tensor var_17996_axes_0 = const()[name = string("op_17996_axes_0"), val = tensor([2])]; + tensor var_17996_cast_fp16 = squeeze(axes = var_17996_axes_0, x = var_17994_cast_fp16)[name = string("op_17996_cast_fp16")]; + tensor logits_13_perm_0 = const()[name = string("logits_13_perm_0"), val = tensor([0, 2, 1])]; + string var_18010_pad_type_0 = const()[name = string("op_18010_pad_type_0"), val = string("valid")]; + tensor var_18010_strides_0 = const()[name = string("op_18010_strides_0"), val = tensor([1, 1])]; + tensor var_18010_pad_0 = const()[name = string("op_18010_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18010_dilations_0 = const()[name = string("op_18010_dilations_0"), val = tensor([1, 1])]; + int32 var_18010_groups_0 = const()[name = string("op_18010_groups_0"), val = int32(1)]; + tensor model_lm_head16_8_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(993403008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007558848))))[name = string("model_lm_head16_8_weight_promoted_to_fp16_palettized")]; + tensor var_18010_cast_fp16 = conv(dilations = var_18010_dilations_0, groups = var_18010_groups_0, pad = var_18010_pad_0, pad_type = var_18010_pad_type_0, strides = var_18010_strides_0, weight = model_lm_head16_8_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18010_cast_fp16")]; + tensor var_18012_axes_0 = const()[name = string("op_18012_axes_0"), val = tensor([2])]; + tensor var_18012_cast_fp16 = squeeze(axes = var_18012_axes_0, x = var_18010_cast_fp16)[name = string("op_18012_cast_fp16")]; + tensor logits_15_perm_0 = const()[name = string("logits_15_perm_0"), val = tensor([0, 2, 1])]; + string var_18026_pad_type_0 = const()[name = string("op_18026_pad_type_0"), val = string("valid")]; + tensor var_18026_strides_0 = const()[name = string("op_18026_strides_0"), val = tensor([1, 1])]; + tensor var_18026_pad_0 = const()[name = string("op_18026_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18026_dilations_0 = const()[name = string("op_18026_dilations_0"), val = tensor([1, 1])]; + int32 var_18026_groups_0 = const()[name = string("op_18026_groups_0"), val = int32(1)]; + tensor model_lm_head16_9_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1008083200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022239040))))[name = string("model_lm_head16_9_weight_promoted_to_fp16_palettized")]; + tensor var_18026_cast_fp16 = conv(dilations = var_18026_dilations_0, groups = var_18026_groups_0, pad = var_18026_pad_0, pad_type = var_18026_pad_type_0, strides = var_18026_strides_0, weight = model_lm_head16_9_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18026_cast_fp16")]; + tensor var_18028_axes_0 = const()[name = string("op_18028_axes_0"), val = tensor([2])]; + tensor var_18028_cast_fp16 = squeeze(axes = var_18028_axes_0, x = var_18026_cast_fp16)[name = string("op_18028_cast_fp16")]; + tensor logits_17_perm_0 = const()[name = string("logits_17_perm_0"), val = tensor([0, 2, 1])]; + string var_18042_pad_type_0 = const()[name = string("op_18042_pad_type_0"), val = string("valid")]; + tensor var_18042_strides_0 = const()[name = string("op_18042_strides_0"), val = tensor([1, 1])]; + tensor var_18042_pad_0 = const()[name = string("op_18042_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18042_dilations_0 = const()[name = string("op_18042_dilations_0"), val = tensor([1, 1])]; + int32 var_18042_groups_0 = const()[name = string("op_18042_groups_0"), val = int32(1)]; + tensor model_lm_head16_10_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022763392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036919232))))[name = string("model_lm_head16_10_weight_promoted_to_fp16_palettized")]; + tensor var_18042_cast_fp16 = conv(dilations = var_18042_dilations_0, groups = var_18042_groups_0, pad = var_18042_pad_0, pad_type = var_18042_pad_type_0, strides = var_18042_strides_0, weight = model_lm_head16_10_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18042_cast_fp16")]; + tensor var_18044_axes_0 = const()[name = string("op_18044_axes_0"), val = tensor([2])]; + tensor var_18044_cast_fp16 = squeeze(axes = var_18044_axes_0, x = var_18042_cast_fp16)[name = string("op_18044_cast_fp16")]; + tensor logits_19_perm_0 = const()[name = string("logits_19_perm_0"), val = tensor([0, 2, 1])]; + string var_18058_pad_type_0 = const()[name = string("op_18058_pad_type_0"), val = string("valid")]; + tensor var_18058_strides_0 = const()[name = string("op_18058_strides_0"), val = tensor([1, 1])]; + tensor var_18058_pad_0 = const()[name = string("op_18058_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18058_dilations_0 = const()[name = string("op_18058_dilations_0"), val = tensor([1, 1])]; + int32 var_18058_groups_0 = const()[name = string("op_18058_groups_0"), val = int32(1)]; + tensor model_lm_head16_11_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1037443584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1051599424))))[name = string("model_lm_head16_11_weight_promoted_to_fp16_palettized")]; + tensor var_18058_cast_fp16 = conv(dilations = var_18058_dilations_0, groups = var_18058_groups_0, pad = var_18058_pad_0, pad_type = var_18058_pad_type_0, strides = var_18058_strides_0, weight = model_lm_head16_11_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18058_cast_fp16")]; + tensor var_18060_axes_0 = const()[name = string("op_18060_axes_0"), val = tensor([2])]; + tensor var_18060_cast_fp16 = squeeze(axes = var_18060_axes_0, x = var_18058_cast_fp16)[name = string("op_18060_cast_fp16")]; + tensor logits_21_perm_0 = const()[name = string("logits_21_perm_0"), val = tensor([0, 2, 1])]; + string var_18074_pad_type_0 = const()[name = string("op_18074_pad_type_0"), val = string("valid")]; + tensor var_18074_strides_0 = const()[name = string("op_18074_strides_0"), val = tensor([1, 1])]; + tensor var_18074_pad_0 = const()[name = string("op_18074_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18074_dilations_0 = const()[name = string("op_18074_dilations_0"), val = tensor([1, 1])]; + int32 var_18074_groups_0 = const()[name = string("op_18074_groups_0"), val = int32(1)]; + tensor model_lm_head16_12_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052123776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066279616))))[name = string("model_lm_head16_12_weight_promoted_to_fp16_palettized")]; + tensor var_18074_cast_fp16 = conv(dilations = var_18074_dilations_0, groups = var_18074_groups_0, pad = var_18074_pad_0, pad_type = var_18074_pad_type_0, strides = var_18074_strides_0, weight = model_lm_head16_12_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18074_cast_fp16")]; + tensor var_18076_axes_0 = const()[name = string("op_18076_axes_0"), val = tensor([2])]; + tensor var_18076_cast_fp16 = squeeze(axes = var_18076_axes_0, x = var_18074_cast_fp16)[name = string("op_18076_cast_fp16")]; + tensor logits_23_perm_0 = const()[name = string("logits_23_perm_0"), val = tensor([0, 2, 1])]; + string var_18090_pad_type_0 = const()[name = string("op_18090_pad_type_0"), val = string("valid")]; + tensor var_18090_strides_0 = const()[name = string("op_18090_strides_0"), val = tensor([1, 1])]; + tensor var_18090_pad_0 = const()[name = string("op_18090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18090_dilations_0 = const()[name = string("op_18090_dilations_0"), val = tensor([1, 1])]; + int32 var_18090_groups_0 = const()[name = string("op_18090_groups_0"), val = int32(1)]; + tensor model_lm_head16_13_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066803968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080959808))))[name = string("model_lm_head16_13_weight_promoted_to_fp16_palettized")]; + tensor var_18090_cast_fp16 = conv(dilations = var_18090_dilations_0, groups = var_18090_groups_0, pad = var_18090_pad_0, pad_type = var_18090_pad_type_0, strides = var_18090_strides_0, weight = model_lm_head16_13_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18090_cast_fp16")]; + tensor var_18092_axes_0 = const()[name = string("op_18092_axes_0"), val = tensor([2])]; + tensor var_18092_cast_fp16 = squeeze(axes = var_18092_axes_0, x = var_18090_cast_fp16)[name = string("op_18092_cast_fp16")]; + tensor logits_25_perm_0 = const()[name = string("logits_25_perm_0"), val = tensor([0, 2, 1])]; + string var_18106_pad_type_0 = const()[name = string("op_18106_pad_type_0"), val = string("valid")]; + tensor var_18106_strides_0 = const()[name = string("op_18106_strides_0"), val = tensor([1, 1])]; + tensor var_18106_pad_0 = const()[name = string("op_18106_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18106_dilations_0 = const()[name = string("op_18106_dilations_0"), val = tensor([1, 1])]; + int32 var_18106_groups_0 = const()[name = string("op_18106_groups_0"), val = int32(1)]; + tensor model_lm_head16_14_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1081484160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1095640000))))[name = string("model_lm_head16_14_weight_promoted_to_fp16_palettized")]; + tensor var_18106_cast_fp16 = conv(dilations = var_18106_dilations_0, groups = var_18106_groups_0, pad = var_18106_pad_0, pad_type = var_18106_pad_type_0, strides = var_18106_strides_0, weight = model_lm_head16_14_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18106_cast_fp16")]; + tensor var_18108_axes_0 = const()[name = string("op_18108_axes_0"), val = tensor([2])]; + tensor var_18108_cast_fp16 = squeeze(axes = var_18108_axes_0, x = var_18106_cast_fp16)[name = string("op_18108_cast_fp16")]; + tensor logits_27_perm_0 = const()[name = string("logits_27_perm_0"), val = tensor([0, 2, 1])]; + string var_18122_pad_type_0 = const()[name = string("op_18122_pad_type_0"), val = string("valid")]; + tensor var_18122_strides_0 = const()[name = string("op_18122_strides_0"), val = tensor([1, 1])]; + tensor var_18122_pad_0 = const()[name = string("op_18122_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18122_dilations_0 = const()[name = string("op_18122_dilations_0"), val = tensor([1, 1])]; + int32 var_18122_groups_0 = const()[name = string("op_18122_groups_0"), val = int32(1)]; + tensor model_lm_head16_15_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096164352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110320192))))[name = string("model_lm_head16_15_weight_promoted_to_fp16_palettized")]; + tensor var_18122_cast_fp16 = conv(dilations = var_18122_dilations_0, groups = var_18122_groups_0, pad = var_18122_pad_0, pad_type = var_18122_pad_type_0, strides = var_18122_strides_0, weight = model_lm_head16_15_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18122_cast_fp16")]; + tensor var_18124_axes_0 = const()[name = string("op_18124_axes_0"), val = tensor([2])]; + tensor var_18124_cast_fp16 = squeeze(axes = var_18124_axes_0, x = var_18122_cast_fp16)[name = string("op_18124_cast_fp16")]; + tensor logits_29_perm_0 = const()[name = string("logits_29_perm_0"), val = tensor([0, 2, 1])]; + string var_18138_pad_type_0 = const()[name = string("op_18138_pad_type_0"), val = string("valid")]; + tensor var_18138_strides_0 = const()[name = string("op_18138_strides_0"), val = tensor([1, 1])]; + tensor var_18138_pad_0 = const()[name = string("op_18138_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18138_dilations_0 = const()[name = string("op_18138_dilations_0"), val = tensor([1, 1])]; + int32 var_18138_groups_0 = const()[name = string("op_18138_groups_0"), val = int32(1)]; + tensor model_lm_head16_16_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110844544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1125000384))))[name = string("model_lm_head16_16_weight_promoted_to_fp16_palettized")]; + tensor var_18138_cast_fp16 = conv(dilations = var_18138_dilations_0, groups = var_18138_groups_0, pad = var_18138_pad_0, pad_type = var_18138_pad_type_0, strides = var_18138_strides_0, weight = model_lm_head16_16_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_18138_cast_fp16")]; + tensor var_18140_axes_0 = const()[name = string("op_18140_axes_0"), val = tensor([2])]; + tensor var_18140_cast_fp16 = squeeze(axes = var_18140_axes_0, x = var_18138_cast_fp16)[name = string("op_18140_cast_fp16")]; + tensor logits_perm_0 = const()[name = string("logits_perm_0"), val = tensor([0, 2, 1])]; + int32 chunk_argmax_1_axis_0 = const()[name = string("chunk_argmax_1_axis_0"), val = int32(-1)]; + bool chunk_argmax_1_keep_dims_0 = const()[name = string("chunk_argmax_1_keep_dims_0"), val = bool(true)]; + string chunk_argmax_1_output_dtype_0 = const()[name = string("chunk_argmax_1_output_dtype_0"), val = string("int32")]; + tensor logits_1_cast_fp16 = transpose(perm = logits_1_perm_0, x = var_17900_cast_fp16)[name = string("transpose_15")]; + tensor chunk_argmax_1_cast_fp16 = reduce_argmax(axis = chunk_argmax_1_axis_0, keep_dims = chunk_argmax_1_keep_dims_0, output_dtype = chunk_argmax_1_output_dtype_0, x = logits_1_cast_fp16)[name = string("chunk_argmax_1_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = logits_1_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + int32 chunk_argmax_3_axis_0 = const()[name = string("chunk_argmax_3_axis_0"), val = int32(-1)]; + bool chunk_argmax_3_keep_dims_0 = const()[name = string("chunk_argmax_3_keep_dims_0"), val = bool(true)]; + string chunk_argmax_3_output_dtype_0 = const()[name = string("chunk_argmax_3_output_dtype_0"), val = string("int32")]; + tensor logits_3_cast_fp16 = transpose(perm = logits_3_perm_0, x = var_17916_cast_fp16)[name = string("transpose_14")]; + tensor chunk_argmax_3_cast_fp16 = reduce_argmax(axis = chunk_argmax_3_axis_0, keep_dims = chunk_argmax_3_keep_dims_0, output_dtype = chunk_argmax_3_output_dtype_0, x = logits_3_cast_fp16)[name = string("chunk_argmax_3_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = logits_3_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + int32 chunk_argmax_5_axis_0 = const()[name = string("chunk_argmax_5_axis_0"), val = int32(-1)]; + bool chunk_argmax_5_keep_dims_0 = const()[name = string("chunk_argmax_5_keep_dims_0"), val = bool(true)]; + string chunk_argmax_5_output_dtype_0 = const()[name = string("chunk_argmax_5_output_dtype_0"), val = string("int32")]; + tensor logits_5_cast_fp16 = transpose(perm = logits_5_perm_0, x = var_17932_cast_fp16)[name = string("transpose_13")]; + tensor chunk_argmax_5_cast_fp16 = reduce_argmax(axis = chunk_argmax_5_axis_0, keep_dims = chunk_argmax_5_keep_dims_0, output_dtype = chunk_argmax_5_output_dtype_0, x = logits_5_cast_fp16)[name = string("chunk_argmax_5_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = logits_5_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + int32 chunk_argmax_7_axis_0 = const()[name = string("chunk_argmax_7_axis_0"), val = int32(-1)]; + bool chunk_argmax_7_keep_dims_0 = const()[name = string("chunk_argmax_7_keep_dims_0"), val = bool(true)]; + string chunk_argmax_7_output_dtype_0 = const()[name = string("chunk_argmax_7_output_dtype_0"), val = string("int32")]; + tensor logits_7_cast_fp16 = transpose(perm = logits_7_perm_0, x = var_17948_cast_fp16)[name = string("transpose_12")]; + tensor chunk_argmax_7_cast_fp16 = reduce_argmax(axis = chunk_argmax_7_axis_0, keep_dims = chunk_argmax_7_keep_dims_0, output_dtype = chunk_argmax_7_output_dtype_0, x = logits_7_cast_fp16)[name = string("chunk_argmax_7_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = logits_7_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + int32 chunk_argmax_9_axis_0 = const()[name = string("chunk_argmax_9_axis_0"), val = int32(-1)]; + bool chunk_argmax_9_keep_dims_0 = const()[name = string("chunk_argmax_9_keep_dims_0"), val = bool(true)]; + string chunk_argmax_9_output_dtype_0 = const()[name = string("chunk_argmax_9_output_dtype_0"), val = string("int32")]; + tensor logits_9_cast_fp16 = transpose(perm = logits_9_perm_0, x = var_17964_cast_fp16)[name = string("transpose_11")]; + tensor chunk_argmax_9_cast_fp16 = reduce_argmax(axis = chunk_argmax_9_axis_0, keep_dims = chunk_argmax_9_keep_dims_0, output_dtype = chunk_argmax_9_output_dtype_0, x = logits_9_cast_fp16)[name = string("chunk_argmax_9_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = logits_9_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + int32 chunk_argmax_11_axis_0 = const()[name = string("chunk_argmax_11_axis_0"), val = int32(-1)]; + bool chunk_argmax_11_keep_dims_0 = const()[name = string("chunk_argmax_11_keep_dims_0"), val = bool(true)]; + string chunk_argmax_11_output_dtype_0 = const()[name = string("chunk_argmax_11_output_dtype_0"), val = string("int32")]; + tensor logits_11_cast_fp16 = transpose(perm = logits_11_perm_0, x = var_17980_cast_fp16)[name = string("transpose_10")]; + tensor chunk_argmax_11_cast_fp16 = reduce_argmax(axis = chunk_argmax_11_axis_0, keep_dims = chunk_argmax_11_keep_dims_0, output_dtype = chunk_argmax_11_output_dtype_0, x = logits_11_cast_fp16)[name = string("chunk_argmax_11_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = logits_11_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + int32 chunk_argmax_13_axis_0 = const()[name = string("chunk_argmax_13_axis_0"), val = int32(-1)]; + bool chunk_argmax_13_keep_dims_0 = const()[name = string("chunk_argmax_13_keep_dims_0"), val = bool(true)]; + string chunk_argmax_13_output_dtype_0 = const()[name = string("chunk_argmax_13_output_dtype_0"), val = string("int32")]; + tensor logits_13_cast_fp16 = transpose(perm = logits_13_perm_0, x = var_17996_cast_fp16)[name = string("transpose_9")]; + tensor chunk_argmax_13_cast_fp16 = reduce_argmax(axis = chunk_argmax_13_axis_0, keep_dims = chunk_argmax_13_keep_dims_0, output_dtype = chunk_argmax_13_output_dtype_0, x = logits_13_cast_fp16)[name = string("chunk_argmax_13_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = logits_13_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + int32 chunk_argmax_15_axis_0 = const()[name = string("chunk_argmax_15_axis_0"), val = int32(-1)]; + bool chunk_argmax_15_keep_dims_0 = const()[name = string("chunk_argmax_15_keep_dims_0"), val = bool(true)]; + string chunk_argmax_15_output_dtype_0 = const()[name = string("chunk_argmax_15_output_dtype_0"), val = string("int32")]; + tensor logits_15_cast_fp16 = transpose(perm = logits_15_perm_0, x = var_18012_cast_fp16)[name = string("transpose_8")]; + tensor chunk_argmax_15_cast_fp16 = reduce_argmax(axis = chunk_argmax_15_axis_0, keep_dims = chunk_argmax_15_keep_dims_0, output_dtype = chunk_argmax_15_output_dtype_0, x = logits_15_cast_fp16)[name = string("chunk_argmax_15_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = logits_15_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + int32 chunk_argmax_17_axis_0 = const()[name = string("chunk_argmax_17_axis_0"), val = int32(-1)]; + bool chunk_argmax_17_keep_dims_0 = const()[name = string("chunk_argmax_17_keep_dims_0"), val = bool(true)]; + string chunk_argmax_17_output_dtype_0 = const()[name = string("chunk_argmax_17_output_dtype_0"), val = string("int32")]; + tensor logits_17_cast_fp16 = transpose(perm = logits_17_perm_0, x = var_18028_cast_fp16)[name = string("transpose_7")]; + tensor chunk_argmax_17_cast_fp16 = reduce_argmax(axis = chunk_argmax_17_axis_0, keep_dims = chunk_argmax_17_keep_dims_0, output_dtype = chunk_argmax_17_output_dtype_0, x = logits_17_cast_fp16)[name = string("chunk_argmax_17_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8_cast_fp16 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = logits_17_cast_fp16)[name = string("reduce_max_8_cast_fp16")]; + int32 chunk_argmax_19_axis_0 = const()[name = string("chunk_argmax_19_axis_0"), val = int32(-1)]; + bool chunk_argmax_19_keep_dims_0 = const()[name = string("chunk_argmax_19_keep_dims_0"), val = bool(true)]; + string chunk_argmax_19_output_dtype_0 = const()[name = string("chunk_argmax_19_output_dtype_0"), val = string("int32")]; + tensor logits_19_cast_fp16 = transpose(perm = logits_19_perm_0, x = var_18044_cast_fp16)[name = string("transpose_6")]; + tensor chunk_argmax_19_cast_fp16 = reduce_argmax(axis = chunk_argmax_19_axis_0, keep_dims = chunk_argmax_19_keep_dims_0, output_dtype = chunk_argmax_19_output_dtype_0, x = logits_19_cast_fp16)[name = string("chunk_argmax_19_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9_cast_fp16 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = logits_19_cast_fp16)[name = string("reduce_max_9_cast_fp16")]; + int32 chunk_argmax_21_axis_0 = const()[name = string("chunk_argmax_21_axis_0"), val = int32(-1)]; + bool chunk_argmax_21_keep_dims_0 = const()[name = string("chunk_argmax_21_keep_dims_0"), val = bool(true)]; + string chunk_argmax_21_output_dtype_0 = const()[name = string("chunk_argmax_21_output_dtype_0"), val = string("int32")]; + tensor logits_21_cast_fp16 = transpose(perm = logits_21_perm_0, x = var_18060_cast_fp16)[name = string("transpose_5")]; + tensor chunk_argmax_21_cast_fp16 = reduce_argmax(axis = chunk_argmax_21_axis_0, keep_dims = chunk_argmax_21_keep_dims_0, output_dtype = chunk_argmax_21_output_dtype_0, x = logits_21_cast_fp16)[name = string("chunk_argmax_21_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10_cast_fp16 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = logits_21_cast_fp16)[name = string("reduce_max_10_cast_fp16")]; + int32 chunk_argmax_23_axis_0 = const()[name = string("chunk_argmax_23_axis_0"), val = int32(-1)]; + bool chunk_argmax_23_keep_dims_0 = const()[name = string("chunk_argmax_23_keep_dims_0"), val = bool(true)]; + string chunk_argmax_23_output_dtype_0 = const()[name = string("chunk_argmax_23_output_dtype_0"), val = string("int32")]; + tensor logits_23_cast_fp16 = transpose(perm = logits_23_perm_0, x = var_18076_cast_fp16)[name = string("transpose_4")]; + tensor chunk_argmax_23_cast_fp16 = reduce_argmax(axis = chunk_argmax_23_axis_0, keep_dims = chunk_argmax_23_keep_dims_0, output_dtype = chunk_argmax_23_output_dtype_0, x = logits_23_cast_fp16)[name = string("chunk_argmax_23_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11_cast_fp16 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = logits_23_cast_fp16)[name = string("reduce_max_11_cast_fp16")]; + int32 chunk_argmax_25_axis_0 = const()[name = string("chunk_argmax_25_axis_0"), val = int32(-1)]; + bool chunk_argmax_25_keep_dims_0 = const()[name = string("chunk_argmax_25_keep_dims_0"), val = bool(true)]; + string chunk_argmax_25_output_dtype_0 = const()[name = string("chunk_argmax_25_output_dtype_0"), val = string("int32")]; + tensor logits_25_cast_fp16 = transpose(perm = logits_25_perm_0, x = var_18092_cast_fp16)[name = string("transpose_3")]; + tensor chunk_argmax_25_cast_fp16 = reduce_argmax(axis = chunk_argmax_25_axis_0, keep_dims = chunk_argmax_25_keep_dims_0, output_dtype = chunk_argmax_25_output_dtype_0, x = logits_25_cast_fp16)[name = string("chunk_argmax_25_cast_fp16")]; + tensor reduce_max_12_axes_0 = const()[name = string("reduce_max_12_axes_0"), val = tensor([-1])]; + bool reduce_max_12_keep_dims_0 = const()[name = string("reduce_max_12_keep_dims_0"), val = bool(true)]; + tensor reduce_max_12_cast_fp16 = reduce_max(axes = reduce_max_12_axes_0, keep_dims = reduce_max_12_keep_dims_0, x = logits_25_cast_fp16)[name = string("reduce_max_12_cast_fp16")]; + int32 chunk_argmax_27_axis_0 = const()[name = string("chunk_argmax_27_axis_0"), val = int32(-1)]; + bool chunk_argmax_27_keep_dims_0 = const()[name = string("chunk_argmax_27_keep_dims_0"), val = bool(true)]; + string chunk_argmax_27_output_dtype_0 = const()[name = string("chunk_argmax_27_output_dtype_0"), val = string("int32")]; + tensor logits_27_cast_fp16 = transpose(perm = logits_27_perm_0, x = var_18108_cast_fp16)[name = string("transpose_2")]; + tensor chunk_argmax_27_cast_fp16 = reduce_argmax(axis = chunk_argmax_27_axis_0, keep_dims = chunk_argmax_27_keep_dims_0, output_dtype = chunk_argmax_27_output_dtype_0, x = logits_27_cast_fp16)[name = string("chunk_argmax_27_cast_fp16")]; + tensor reduce_max_13_axes_0 = const()[name = string("reduce_max_13_axes_0"), val = tensor([-1])]; + bool reduce_max_13_keep_dims_0 = const()[name = string("reduce_max_13_keep_dims_0"), val = bool(true)]; + tensor reduce_max_13_cast_fp16 = reduce_max(axes = reduce_max_13_axes_0, keep_dims = reduce_max_13_keep_dims_0, x = logits_27_cast_fp16)[name = string("reduce_max_13_cast_fp16")]; + int32 chunk_argmax_29_axis_0 = const()[name = string("chunk_argmax_29_axis_0"), val = int32(-1)]; + bool chunk_argmax_29_keep_dims_0 = const()[name = string("chunk_argmax_29_keep_dims_0"), val = bool(true)]; + string chunk_argmax_29_output_dtype_0 = const()[name = string("chunk_argmax_29_output_dtype_0"), val = string("int32")]; + tensor logits_29_cast_fp16 = transpose(perm = logits_29_perm_0, x = var_18124_cast_fp16)[name = string("transpose_1")]; + tensor chunk_argmax_29_cast_fp16 = reduce_argmax(axis = chunk_argmax_29_axis_0, keep_dims = chunk_argmax_29_keep_dims_0, output_dtype = chunk_argmax_29_output_dtype_0, x = logits_29_cast_fp16)[name = string("chunk_argmax_29_cast_fp16")]; + tensor reduce_max_14_axes_0 = const()[name = string("reduce_max_14_axes_0"), val = tensor([-1])]; + bool reduce_max_14_keep_dims_0 = const()[name = string("reduce_max_14_keep_dims_0"), val = bool(true)]; + tensor reduce_max_14_cast_fp16 = reduce_max(axes = reduce_max_14_axes_0, keep_dims = reduce_max_14_keep_dims_0, x = logits_29_cast_fp16)[name = string("reduce_max_14_cast_fp16")]; + int32 chunk_argmax_axis_0 = const()[name = string("chunk_argmax_axis_0"), val = int32(-1)]; + bool chunk_argmax_keep_dims_0 = const()[name = string("chunk_argmax_keep_dims_0"), val = bool(true)]; + string chunk_argmax_output_dtype_0 = const()[name = string("chunk_argmax_output_dtype_0"), val = string("int32")]; + tensor logits_cast_fp16 = transpose(perm = logits_perm_0, x = var_18140_cast_fp16)[name = string("transpose_0")]; + tensor chunk_argmax_cast_fp16 = reduce_argmax(axis = chunk_argmax_axis_0, keep_dims = chunk_argmax_keep_dims_0, output_dtype = chunk_argmax_output_dtype_0, x = logits_cast_fp16)[name = string("chunk_argmax_cast_fp16")]; + tensor reduce_max_15_axes_0 = const()[name = string("reduce_max_15_axes_0"), val = tensor([-1])]; + bool reduce_max_15_keep_dims_0 = const()[name = string("reduce_max_15_keep_dims_0"), val = bool(true)]; + tensor reduce_max_15_cast_fp16 = reduce_max(axes = reduce_max_15_axes_0, keep_dims = reduce_max_15_keep_dims_0, x = logits_cast_fp16)[name = string("reduce_max_15_cast_fp16")]; + int32 var_18337 = const()[name = string("op_18337"), val = int32(-1)]; + bool var_18338_interleave_0 = const()[name = string("op_18338_interleave_0"), val = bool(false)]; + tensor var_18338 = concat(axis = var_18337, interleave = var_18338_interleave_0, values = (chunk_argmax_1_cast_fp16, chunk_argmax_3_cast_fp16, chunk_argmax_5_cast_fp16, chunk_argmax_7_cast_fp16, chunk_argmax_9_cast_fp16, chunk_argmax_11_cast_fp16, chunk_argmax_13_cast_fp16, chunk_argmax_15_cast_fp16, chunk_argmax_17_cast_fp16, chunk_argmax_19_cast_fp16, chunk_argmax_21_cast_fp16, chunk_argmax_23_cast_fp16, chunk_argmax_25_cast_fp16, chunk_argmax_27_cast_fp16, chunk_argmax_29_cast_fp16, chunk_argmax_cast_fp16))[name = string("op_18338")]; + tensor var_18340_axes_0 = const()[name = string("op_18340_axes_0"), val = tensor([0])]; + string var_18338_to_uint16_dtype_0 = const()[name = string("op_18338_to_uint16_dtype_0"), val = string("uint16")]; + tensor var_18338_to_uint16 = cast(dtype = var_18338_to_uint16_dtype_0, x = var_18338)[name = string("cast_1")]; + tensor var_18340_cast_uint16 = squeeze(axes = var_18340_axes_0, x = var_18338_to_uint16)[name = string("op_18340_cast_uint16")]; + tensor var_18342_axes_0 = const()[name = string("op_18342_axes_0"), val = tensor([0])]; + tensor var_18342_cast_uint16 = squeeze(axes = var_18342_axes_0, x = var_18340_cast_uint16)[name = string("op_18342_cast_uint16")]; + string var_18342_cast_uint16_to_int32_dtype_0 = const()[name = string("op_18342_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 var_18344 = const()[name = string("op_18344"), val = int32(-1)]; + bool var_18345_interleave_0 = const()[name = string("op_18345_interleave_0"), val = bool(false)]; + tensor var_18345_cast_fp16 = concat(axis = var_18344, interleave = var_18345_interleave_0, values = (reduce_max_0_cast_fp16, reduce_max_1_cast_fp16, reduce_max_2_cast_fp16, reduce_max_3_cast_fp16, reduce_max_4_cast_fp16, reduce_max_5_cast_fp16, reduce_max_6_cast_fp16, reduce_max_7_cast_fp16, reduce_max_8_cast_fp16, reduce_max_9_cast_fp16, reduce_max_10_cast_fp16, reduce_max_11_cast_fp16, reduce_max_12_cast_fp16, reduce_max_13_cast_fp16, reduce_max_14_cast_fp16, reduce_max_15_cast_fp16))[name = string("op_18345_cast_fp16")]; + tensor var_18347_axes_0 = const()[name = string("op_18347_axes_0"), val = tensor([0])]; + tensor var_18347_cast_fp16 = squeeze(axes = var_18347_axes_0, x = var_18345_cast_fp16)[name = string("op_18347_cast_fp16")]; + tensor var_18349_axes_0 = const()[name = string("op_18349_axes_0"), val = tensor([0])]; + tensor argmax_val = squeeze(axes = var_18349_axes_0, x = var_18347_cast_fp16)[name = string("op_18349_cast_fp16")]; + tensor argmax_idx = cast(dtype = var_18342_cast_uint16_to_int32_dtype_0, x = var_18342_cast_uint16)[name = string("cast_0")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (argmax_idx, argmax_val); + func prefill(tensor causal_mask, tensor current_pos, tensor input_ids, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { + tensor model_model_embed_tokens_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301990016))))[name = string("model_model_embed_tokens_weight_palettized")]; + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335544512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336429312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336462144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336683392))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336691648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336912896))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336921152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337805952))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337838784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338060032))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338068288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338289536))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338297792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339182592))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339215424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339436672))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339444928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339666176))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339674432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340559232))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340592064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340813312))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340821568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341042816))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341051072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341935872))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342189952))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342198208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342419456))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342427712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343312512))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343345344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343566592))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343574848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343796096))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343804352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344689152))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344721984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344943232))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344951488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345172736))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345180992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346065792))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346098624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346319872))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346328128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346549376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346557632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347442432))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347475264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347696512))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347926016))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347934272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348819072))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348851904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349073152))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349081408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349302656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350195712))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350228544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350449792))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350458048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350679296))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350687552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351572352))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351826432))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351834688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352055936))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352948992))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352981824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353203072))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353211328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353432576))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353440832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354325632))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354358464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354579712))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354587968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354809216))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354817472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355702272))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355735104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355956352))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355964608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356185856))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356194112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357078912))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357111744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357332992))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357341248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357562496))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357570752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358455552))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358488384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358709632))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358717888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358939136))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358947392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359832192))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359865024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360086272))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360094528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360315776))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360324032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361208832))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361241664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361462912))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361471168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361692416))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361700672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362585472))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362618304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362839552))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362847808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363069056))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363077312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363962112))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363994944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364216192))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364445696))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364453952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365338752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365371584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365592832))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365601088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365822336))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365830592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366715392))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366748224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366969472))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367198976))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367207232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368092032))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368124864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368346112))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368354368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368575616))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368583872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369468672))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369501504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369722752))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369731008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369952256))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369960512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370845312))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370878144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371099392))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371107648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371328896))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + int32 hidden_states_1_batch_dims_0 = const()[name = string("hidden_states_1_batch_dims_0"), val = int32(0)]; + bool hidden_states_1_validate_indices_0 = const()[name = string("hidden_states_1_validate_indices_0"), val = bool(false)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_208 = const()[name = string("slice_by_index_208"), val = int32(262144)]; + tensor add_0 = add(x = input_ids, y = slice_by_index_208)[name = string("add_0")]; + tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; + tensor add_0_1 = add(x = select_0, y = slice_by_index_0)[name = string("add_0_1")]; + tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 hidden_states_1_axis_0 = const()[name = string("hidden_states_1_axis_0"), val = int32(0)]; + tensor hidden_states_1 = gather(axis = hidden_states_1_axis_0, batch_dims = hidden_states_1_batch_dims_0, indices = select_0_1, validate_indices = hidden_states_1_validate_indices_0, x = model_model_embed_tokens_weight_palettized)[name = string("hidden_states_1")]; + fp16 var_1659_to_fp16 = const()[name = string("op_1659_to_fp16"), val = fp16(0x1.0f8p+5)]; + tensor hidden_states_3_cast_fp16 = mul(x = hidden_states_1, y = var_1659_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + int32 var_1679_axis_0 = const()[name = string("op_1679_axis_0"), val = int32(1)]; + int32 var_1679_batch_dims_0 = const()[name = string("op_1679_batch_dims_0"), val = int32(0)]; + bool var_1679_validate_indices_0 = const()[name = string("op_1679_validate_indices_0"), val = bool(false)]; + tensor var_1671_to_fp16 = const()[name = string("op_1671_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375531520)))]; + string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; + tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_0")]; + tensor var_1679_cast_fp16_cast_uint16 = gather(axis = var_1679_axis_0, batch_dims = var_1679_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1679_validate_indices_0, x = var_1671_to_fp16)[name = string("op_1679_cast_fp16_cast_uint16")]; + tensor var_1683 = const()[name = string("op_1683"), val = tensor([1, 64, 1, 256])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1683, x = var_1679_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1693_axis_0 = const()[name = string("op_1693_axis_0"), val = int32(1)]; + int32 var_1693_batch_dims_0 = const()[name = string("op_1693_batch_dims_0"), val = int32(0)]; + bool var_1693_validate_indices_0 = const()[name = string("op_1693_validate_indices_0"), val = bool(false)]; + tensor var_1685_to_fp16 = const()[name = string("op_1685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371337152)))]; + tensor var_1693_cast_fp16_cast_uint16 = gather(axis = var_1693_axis_0, batch_dims = var_1693_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1693_validate_indices_0, x = var_1685_to_fp16)[name = string("op_1693_cast_fp16_cast_uint16")]; + tensor var_1697 = const()[name = string("op_1697"), val = tensor([1, 64, 1, 256])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1697, x = var_1693_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1718 = const()[name = string("op_1718"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_1720 = mul(x = hidden_states_3_cast_fp16, y = const_1_promoted)[name = string("op_1720")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1 = concat(axis = var_1718, interleave = input_1_interleave_0, values = (hidden_states_3_cast_fp16, var_1720))[name = string("input_1")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1715_to_fp16 = const()[name = string("op_1715_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1715_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3")]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379725888)))]; + tensor hidden_states_7_cast_fp16 = mul(x = normed_3, y = var_1734_to_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_1745 = const()[name = string("op_1745"), val = tensor([0, 2, 1])]; + tensor var_1748_axes_0 = const()[name = string("op_1748_axes_0"), val = tensor([2])]; + tensor var_1746_cast_fp16 = transpose(perm = var_1745, x = hidden_states_7_cast_fp16)[name = string("transpose_237")]; + tensor var_1748_cast_fp16 = expand_dims(axes = var_1748_axes_0, x = var_1746_cast_fp16)[name = string("op_1748_cast_fp16")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1748_cast_fp16)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1748_cast_fp16)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1748_cast_fp16)[name = string("value_states_1")]; + tensor var_1790 = const()[name = string("op_1790"), val = tensor([1, 4, 256, 64])]; + tensor var_1791 = reshape(shape = var_1790, x = query_states_1)[name = string("op_1791")]; + tensor var_1796 = const()[name = string("op_1796"), val = tensor([0, 1, 3, 2])]; + tensor var_1801 = const()[name = string("op_1801"), val = tensor([1, 1, 256, 64])]; + tensor var_1802 = reshape(shape = var_1801, x = key_states_1)[name = string("op_1802")]; + tensor var_1807 = const()[name = string("op_1807"), val = tensor([0, 1, 3, 2])]; + tensor var_1812 = const()[name = string("op_1812"), val = tensor([1, 1, 256, 64])]; + tensor var_1813 = reshape(shape = var_1812, x = value_states_1)[name = string("op_1813")]; + tensor var_1818 = const()[name = string("op_1818"), val = tensor([0, 1, 3, 2])]; + int32 var_1829 = const()[name = string("op_1829"), val = int32(-1)]; + fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_9 = transpose(perm = var_1796, x = var_1791)[name = string("transpose_236")]; + tensor var_1831 = mul(x = hidden_states_9, y = const_6_promoted)[name = string("op_1831")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1829, interleave = input_5_interleave_0, values = (hidden_states_9, var_1831))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1826_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728256)))]; + tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1845_to_fp16)[name = string("q_1_cast_fp16")]; + int32 var_1856 = const()[name = string("op_1856"), val = int32(-1)]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_11 = transpose(perm = var_1807, x = var_1802)[name = string("transpose_235")]; + tensor var_1858 = mul(x = hidden_states_11, y = const_10_promoted)[name = string("op_1858")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1856, interleave = input_7_interleave_0, values = (hidden_states_11, var_1858))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1853_to_fp16 = const()[name = string("op_1853_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1853_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor var_1872_to_fp16 = const()[name = string("op_1872_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728832)))]; + tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1872_to_fp16)[name = string("k_1_cast_fp16")]; + tensor var_1878 = const()[name = string("op_1878"), val = tensor([0, 2, 1, 3])]; + tensor var_1884 = const()[name = string("op_1884"), val = tensor([0, 2, 1, 3])]; + tensor cos_5 = transpose(perm = var_1878, x = cos_1_cast_fp16)[name = string("transpose_234")]; + tensor var_1886_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5)[name = string("op_1886_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1907_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1907_cast_fp16")]; + int32 var_1909 = const()[name = string("op_1909"), val = int32(-1)]; + bool var_1910_interleave_0 = const()[name = string("op_1910_interleave_0"), val = bool(false)]; + tensor var_1910_cast_fp16 = concat(axis = var_1909, interleave = var_1910_interleave_0, values = (var_1907_cast_fp16, x1_1_cast_fp16))[name = string("op_1910_cast_fp16")]; + tensor sin_5 = transpose(perm = var_1884, x = sin_1_cast_fp16)[name = string("transpose_233")]; + tensor var_1911_cast_fp16 = mul(x = var_1910_cast_fp16, y = sin_5)[name = string("op_1911_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_1886_cast_fp16, y = var_1911_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor var_1914_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5)[name = string("op_1914_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1935_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1935_cast_fp16")]; + int32 var_1937 = const()[name = string("op_1937"), val = int32(-1)]; + bool var_1938_interleave_0 = const()[name = string("op_1938_interleave_0"), val = bool(false)]; + tensor var_1938_cast_fp16 = concat(axis = var_1937, interleave = var_1938_interleave_0, values = (var_1935_cast_fp16, x1_3_cast_fp16))[name = string("op_1938_cast_fp16")]; + tensor var_1939_cast_fp16 = mul(x = var_1938_cast_fp16, y = sin_5)[name = string("op_1939_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_1914_cast_fp16, y = var_1939_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor seq_len_5 = const()[name = string("seq_len_5"), val = tensor([64])]; + tensor end_pos_1 = add(x = current_pos, y = seq_len_5)[name = string("end_pos_1")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, end_pos_1, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_104")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([22])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([23])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, end_pos_1, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_1818, x = var_1813)[name = string("transpose_232")]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_105")]; + tensor var_2038_begin_0 = const()[name = string("op_2038_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2038_end_0 = const()[name = string("op_2038_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_2038_end_mask_0 = const()[name = string("op_2038_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = coreml_update_state_53)[name = string("op_2038_cast_fp16")]; + tensor var_2045_begin_0 = const()[name = string("op_2045_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_2045_end_0 = const()[name = string("op_2045_end_0"), val = tensor([23, 1, 512, 256])]; + tensor var_2045_end_mask_0 = const()[name = string("op_2045_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2045_cast_fp16 = slice_by_index(begin = var_2045_begin_0, end = var_2045_end_0, end_mask = var_2045_end_mask_0, x = coreml_update_state_53)[name = string("op_2045_cast_fp16")]; + tensor var_2084 = const()[name = string("op_2084"), val = tensor([1, 4, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_2084, x = var_2038_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_2104 = const()[name = string("op_2104"), val = tensor([1, 4, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_2104, x = var_2045_cast_fp16)[name = string("x_11_cast_fp16")]; + bool var_2131_transpose_x_0 = const()[name = string("op_2131_transpose_x_0"), val = bool(false)]; + bool var_2131_transpose_y_0 = const()[name = string("op_2131_transpose_y_0"), val = bool(true)]; + tensor var_2131 = matmul(transpose_x = var_2131_transpose_x_0, transpose_y = var_2131_transpose_y_0, x = query_states_3_cast_fp16, y = x_5_cast_fp16)[name = string("op_2131")]; + fp16 var_2132_to_fp16 = const()[name = string("op_2132_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_2131, y = var_2132_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor mask_slice_1_begin_0 = const()[name = string("mask_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor mask_slice_1_end_0 = const()[name = string("mask_slice_1_end_0"), val = tensor([1, 1, 64, 512])]; + tensor mask_slice_1_end_mask_0 = const()[name = string("mask_slice_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor mask_slice_1 = slice_by_index(begin = mask_slice_1_begin_0, end = mask_slice_1_end_0, end_mask = mask_slice_1_end_mask_0, x = causal_mask)[name = string("mask_slice_1")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask_slice_1)[name = string("attn_weights_3_cast_fp16")]; + int32 var_2167 = const()[name = string("op_2167"), val = int32(-1)]; + tensor var_2169_cast_fp16 = softmax(axis = var_2167, x = attn_weights_3_cast_fp16)[name = string("op_2169_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 64, 512])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_2169_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([4, 512, 256])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_11_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 4, 64, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_2181_perm_0 = const()[name = string("op_2181_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2200 = const()[name = string("op_2200"), val = tensor([1, 64, 1024])]; + tensor var_2181_cast_fp16 = transpose(perm = var_2181_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_231")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_2200, x = var_2181_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_2205 = const()[name = string("op_2205"), val = tensor([0, 2, 1])]; + string var_2221_pad_type_0 = const()[name = string("op_2221_pad_type_0"), val = string("valid")]; + int32 var_2221_groups_0 = const()[name = string("op_2221_groups_0"), val = int32(1)]; + tensor var_2221_strides_0 = const()[name = string("op_2221_strides_0"), val = tensor([1])]; + tensor var_2221_pad_0 = const()[name = string("op_2221_pad_0"), val = tensor([0, 0])]; + tensor var_2221_dilations_0 = const()[name = string("op_2221_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379729408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380614208))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2206_cast_fp16 = transpose(perm = var_2205, x = attn_output_5_cast_fp16)[name = string("transpose_230")]; + tensor var_2221_cast_fp16 = conv(dilations = var_2221_dilations_0, groups = var_2221_groups_0, pad = var_2221_pad_0, pad_type = var_2221_pad_type_0, strides = var_2221_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2206_cast_fp16)[name = string("op_2221_cast_fp16")]; + tensor var_2225 = const()[name = string("op_2225"), val = tensor([0, 2, 1])]; + int32 var_2236 = const()[name = string("op_2236"), val = int32(-1)]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_13_cast_fp16 = transpose(perm = var_2225, x = var_2221_cast_fp16)[name = string("transpose_229")]; + tensor var_2238_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2238_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_2236, interleave = input_11_interleave_0, values = (hidden_states_13_cast_fp16, var_2238_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_2233_to_fp16 = const()[name = string("op_2233_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2233_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor var_2252_to_fp16 = const()[name = string("op_2252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380651136)))]; + tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2252_to_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = attn_output_9_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + int32 var_2265 = const()[name = string("op_2265"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2267_cast_fp16 = mul(x = hidden_states_15_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2267_cast_fp16")]; + bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; + tensor input_13_cast_fp16 = concat(axis = var_2265, interleave = input_13_interleave_0, values = (hidden_states_15_cast_fp16, var_2267_cast_fp16))[name = string("input_13_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2262_to_fp16 = const()[name = string("op_2262_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2262_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor var_2281_to_fp16 = const()[name = string("op_2281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380653504)))]; + tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2281_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2293 = const()[name = string("op_2293"), val = tensor([0, 2, 1])]; + tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; + tensor var_2294_cast_fp16 = transpose(perm = var_2293, x = x_13_cast_fp16)[name = string("transpose_228")]; + tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2294_cast_fp16)[name = string("input_15_cast_fp16")]; + string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; + tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; + tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; + int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380655872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386627904))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386849152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392821184))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; + string var_2319_mode_0 = const()[name = string("op_2319_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2319_cast_fp16 = gelu(mode = var_2319_mode_0, x = x_15_cast_fp16)[name = string("op_2319_cast_fp16")]; + tensor input_17_cast_fp16 = mul(x = var_2319_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393042432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399014464))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; + tensor var_2327_axes_0 = const()[name = string("op_2327_axes_0"), val = tensor([2])]; + tensor var_2327_cast_fp16 = squeeze(axes = var_2327_axes_0, x = e_1_cast_fp16)[name = string("op_2327_cast_fp16")]; + tensor var_2328 = const()[name = string("op_2328"), val = tensor([0, 2, 1])]; + int32 var_2339 = const()[name = string("op_2339"), val = int32(-1)]; + fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_17_cast_fp16 = transpose(perm = var_2328, x = var_2327_cast_fp16)[name = string("transpose_227")]; + tensor var_2341_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_2341_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2339, interleave = input_19_interleave_0, values = (hidden_states_17_cast_fp16, var_2341_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2336_to_fp16 = const()[name = string("op_2336_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2336_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; + tensor var_2355_to_fp16 = const()[name = string("op_2355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399051392)))]; + tensor hidden_states_19_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2355_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2409 = const()[name = string("op_2409"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2411_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2411_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_2409, interleave = input_21_interleave_0, values = (hidden_states_21_cast_fp16, var_2411_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2406_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; + tensor var_2425_to_fp16 = const()[name = string("op_2425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399053760)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2425_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2436 = const()[name = string("op_2436"), val = tensor([0, 2, 1])]; + tensor var_2439_axes_0 = const()[name = string("op_2439_axes_0"), val = tensor([2])]; + tensor var_2437_cast_fp16 = transpose(perm = var_2436, x = hidden_states_23_cast_fp16)[name = string("transpose_226")]; + tensor var_2439_cast_fp16 = expand_dims(axes = var_2439_axes_0, x = var_2437_cast_fp16)[name = string("op_2439_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2439_cast_fp16)[name = string("query_states_9")]; + string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; + tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; + tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; + int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; + tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2439_cast_fp16)[name = string("key_states_11")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2439_cast_fp16)[name = string("value_states_9")]; + tensor var_2481 = const()[name = string("op_2481"), val = tensor([1, 4, 256, 64])]; + tensor var_2482 = reshape(shape = var_2481, x = query_states_9)[name = string("op_2482")]; + tensor var_2487 = const()[name = string("op_2487"), val = tensor([0, 1, 3, 2])]; + tensor var_2492 = const()[name = string("op_2492"), val = tensor([1, 1, 256, 64])]; + tensor var_2493 = reshape(shape = var_2492, x = key_states_11)[name = string("op_2493")]; + tensor var_2498 = const()[name = string("op_2498"), val = tensor([0, 1, 3, 2])]; + tensor var_2503 = const()[name = string("op_2503"), val = tensor([1, 1, 256, 64])]; + tensor var_2504 = reshape(shape = var_2503, x = value_states_9)[name = string("op_2504")]; + tensor var_2509 = const()[name = string("op_2509"), val = tensor([0, 1, 3, 2])]; + int32 var_2520 = const()[name = string("op_2520"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_25 = transpose(perm = var_2487, x = var_2482)[name = string("transpose_225")]; + tensor var_2522 = mul(x = hidden_states_25, y = const_49_promoted)[name = string("op_2522")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2520, interleave = input_25_interleave_0, values = (hidden_states_25, var_2522))[name = string("input_25")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2517_to_fp16 = const()[name = string("op_2517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2517_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; + tensor var_2536_to_fp16 = const()[name = string("op_2536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056128)))]; + tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2536_to_fp16)[name = string("q_3_cast_fp16")]; + int32 var_2547 = const()[name = string("op_2547"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_27 = transpose(perm = var_2498, x = var_2493)[name = string("transpose_224")]; + tensor var_2549 = mul(x = hidden_states_27, y = const_53_promoted)[name = string("op_2549")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_2547, interleave = input_27_interleave_0, values = (hidden_states_27, var_2549))[name = string("input_27")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2544_to_fp16 = const()[name = string("op_2544_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2544_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; + tensor var_2563_to_fp16 = const()[name = string("op_2563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056704)))]; + tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2563_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_2577_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5)[name = string("op_2577_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2598_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_2598_cast_fp16")]; + int32 var_2600 = const()[name = string("op_2600"), val = int32(-1)]; + bool var_2601_interleave_0 = const()[name = string("op_2601_interleave_0"), val = bool(false)]; + tensor var_2601_cast_fp16 = concat(axis = var_2600, interleave = var_2601_interleave_0, values = (var_2598_cast_fp16, x1_5_cast_fp16))[name = string("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = mul(x = var_2601_cast_fp16, y = sin_5)[name = string("op_2602_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_2577_cast_fp16, y = var_2602_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor var_2605_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5)[name = string("op_2605_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2626_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_2626_cast_fp16")]; + int32 var_2628 = const()[name = string("op_2628"), val = int32(-1)]; + bool var_2629_interleave_0 = const()[name = string("op_2629_interleave_0"), val = bool(false)]; + tensor var_2629_cast_fp16 = concat(axis = var_2628, interleave = var_2629_interleave_0, values = (var_2626_cast_fp16, x1_7_cast_fp16))[name = string("op_2629_cast_fp16")]; + tensor var_2630_cast_fp16 = mul(x = var_2629_cast_fp16, y = sin_5)[name = string("op_2630_cast_fp16")]; + tensor key_states_13_cast_fp16 = add(x = var_2605_cast_fp16, y = var_2630_cast_fp16)[name = string("key_states_13_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, end_pos_1, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = key_states_13_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_106")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([23])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([24])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, end_pos_1, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11 = transpose(perm = var_2509, x = var_2504)[name = string("transpose_223")]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_107")]; + tensor var_2729_begin_0 = const()[name = string("op_2729_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2729_end_0 = const()[name = string("op_2729_end_0"), val = tensor([2, 1, 512, 256])]; + tensor var_2729_end_mask_0 = const()[name = string("op_2729_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2729_cast_fp16 = slice_by_index(begin = var_2729_begin_0, end = var_2729_end_0, end_mask = var_2729_end_mask_0, x = coreml_update_state_55)[name = string("op_2729_cast_fp16")]; + tensor var_2736_begin_0 = const()[name = string("op_2736_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_2736_end_0 = const()[name = string("op_2736_end_0"), val = tensor([24, 1, 512, 256])]; + tensor var_2736_end_mask_0 = const()[name = string("op_2736_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2736_cast_fp16 = slice_by_index(begin = var_2736_begin_0, end = var_2736_end_0, end_mask = var_2736_end_mask_0, x = coreml_update_state_55)[name = string("op_2736_cast_fp16")]; + tensor var_2775 = const()[name = string("op_2775"), val = tensor([1, 4, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2775, x = var_2729_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2795 = const()[name = string("op_2795"), val = tensor([1, 4, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2795, x = var_2736_cast_fp16)[name = string("x_27_cast_fp16")]; + bool var_2822_transpose_x_0 = const()[name = string("op_2822_transpose_x_0"), val = bool(false)]; + bool var_2822_transpose_y_0 = const()[name = string("op_2822_transpose_y_0"), val = bool(true)]; + tensor var_2822 = matmul(transpose_x = var_2822_transpose_x_0, transpose_y = var_2822_transpose_y_0, x = query_states_11_cast_fp16, y = x_21_cast_fp16)[name = string("op_2822")]; + fp16 var_2823_to_fp16 = const()[name = string("op_2823_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_2822, y = var_2823_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask_slice_1)[name = string("attn_weights_7_cast_fp16")]; + int32 var_2858 = const()[name = string("op_2858"), val = int32(-1)]; + tensor var_2860_cast_fp16 = softmax(axis = var_2858, x = attn_weights_7_cast_fp16)[name = string("op_2860_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 64, 512])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_2860_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([4, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_27_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 4, 64, 256])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_2872_perm_0 = const()[name = string("op_2872_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2891 = const()[name = string("op_2891"), val = tensor([1, 64, 1024])]; + tensor var_2872_cast_fp16 = transpose(perm = var_2872_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_222")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2891, x = var_2872_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2896 = const()[name = string("op_2896"), val = tensor([0, 2, 1])]; + string var_2912_pad_type_0 = const()[name = string("op_2912_pad_type_0"), val = string("valid")]; + int32 var_2912_groups_0 = const()[name = string("op_2912_groups_0"), val = int32(1)]; + tensor var_2912_strides_0 = const()[name = string("op_2912_strides_0"), val = tensor([1])]; + tensor var_2912_pad_0 = const()[name = string("op_2912_pad_0"), val = tensor([0, 0])]; + tensor var_2912_dilations_0 = const()[name = string("op_2912_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399057280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399942080))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2897_cast_fp16 = transpose(perm = var_2896, x = attn_output_15_cast_fp16)[name = string("transpose_221")]; + tensor var_2912_cast_fp16 = conv(dilations = var_2912_dilations_0, groups = var_2912_groups_0, pad = var_2912_pad_0, pad_type = var_2912_pad_type_0, strides = var_2912_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2897_cast_fp16)[name = string("op_2912_cast_fp16")]; + tensor var_2916 = const()[name = string("op_2916"), val = tensor([0, 2, 1])]; + int32 var_2927 = const()[name = string("op_2927"), val = int32(-1)]; + fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_29_cast_fp16 = transpose(perm = var_2916, x = var_2912_cast_fp16)[name = string("transpose_220")]; + tensor var_2929_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2929_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_2927, interleave = input_31_interleave_0, values = (hidden_states_29_cast_fp16, var_2929_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2924_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; + tensor var_2943_to_fp16 = const()[name = string("op_2943_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399979008)))]; + tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2943_to_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + int32 var_2956 = const()[name = string("op_2956"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2958_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_2958_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_2956, interleave = input_33_interleave_0, values = (hidden_states_31_cast_fp16, var_2958_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2953_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; + tensor var_2972_to_fp16 = const()[name = string("op_2972_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399981376)))]; + tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2972_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_2984 = const()[name = string("op_2984"), val = tensor([0, 2, 1])]; + tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; + tensor var_2985_cast_fp16 = transpose(perm = var_2984, x = x_29_cast_fp16)[name = string("transpose_219")]; + tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2985_cast_fp16)[name = string("input_35_cast_fp16")]; + string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; + tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; + tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; + int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399983744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405955776))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406177024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412149056))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; + string var_3010_mode_0 = const()[name = string("op_3010_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3010_cast_fp16 = gelu(mode = var_3010_mode_0, x = x_31_cast_fp16)[name = string("op_3010_cast_fp16")]; + tensor input_37_cast_fp16 = mul(x = var_3010_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412370304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418342336))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; + tensor var_3018_axes_0 = const()[name = string("op_3018_axes_0"), val = tensor([2])]; + tensor var_3018_cast_fp16 = squeeze(axes = var_3018_axes_0, x = e_3_cast_fp16)[name = string("op_3018_cast_fp16")]; + tensor var_3019 = const()[name = string("op_3019"), val = tensor([0, 2, 1])]; + int32 var_3030 = const()[name = string("op_3030"), val = int32(-1)]; + fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_33_cast_fp16 = transpose(perm = var_3019, x = var_3018_cast_fp16)[name = string("transpose_218")]; + tensor var_3032_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_82_promoted_to_fp16)[name = string("op_3032_cast_fp16")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39_cast_fp16 = concat(axis = var_3030, interleave = input_39_interleave_0, values = (hidden_states_33_cast_fp16, var_3032_cast_fp16))[name = string("input_39_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_3027_to_fp16 = const()[name = string("op_3027_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_3027_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor var_3046_to_fp16 = const()[name = string("op_3046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418379264)))]; + tensor hidden_states_35_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_3046_to_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + int32 var_3100 = const()[name = string("op_3100"), val = int32(-1)]; + fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3102_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = const_87_promoted_to_fp16)[name = string("op_3102_cast_fp16")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41_cast_fp16 = concat(axis = var_3100, interleave = input_41_interleave_0, values = (hidden_states_37_cast_fp16, var_3102_cast_fp16))[name = string("input_41_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_3097_to_fp16 = const()[name = string("op_3097_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3097_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor var_3116_to_fp16 = const()[name = string("op_3116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418381632)))]; + tensor hidden_states_39_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_3116_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor var_3127 = const()[name = string("op_3127"), val = tensor([0, 2, 1])]; + tensor var_3130_axes_0 = const()[name = string("op_3130_axes_0"), val = tensor([2])]; + tensor var_3128_cast_fp16 = transpose(perm = var_3127, x = hidden_states_39_cast_fp16)[name = string("transpose_217")]; + tensor var_3130_cast_fp16 = expand_dims(axes = var_3130_axes_0, x = var_3128_cast_fp16)[name = string("op_3130_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_3130_cast_fp16)[name = string("query_states_17")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_3130_cast_fp16)[name = string("key_states_21")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_3130_cast_fp16)[name = string("value_states_17")]; + tensor var_3172 = const()[name = string("op_3172"), val = tensor([1, 4, 256, 64])]; + tensor var_3173 = reshape(shape = var_3172, x = query_states_17)[name = string("op_3173")]; + tensor var_3178 = const()[name = string("op_3178"), val = tensor([0, 1, 3, 2])]; + tensor var_3183 = const()[name = string("op_3183"), val = tensor([1, 1, 256, 64])]; + tensor var_3184 = reshape(shape = var_3183, x = key_states_21)[name = string("op_3184")]; + tensor var_3189 = const()[name = string("op_3189"), val = tensor([0, 1, 3, 2])]; + tensor var_3194 = const()[name = string("op_3194"), val = tensor([1, 1, 256, 64])]; + tensor var_3195 = reshape(shape = var_3194, x = value_states_17)[name = string("op_3195")]; + tensor var_3200 = const()[name = string("op_3200"), val = tensor([0, 1, 3, 2])]; + int32 var_3211 = const()[name = string("op_3211"), val = int32(-1)]; + fp16 const_92_promoted = const()[name = string("const_92_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_41 = transpose(perm = var_3178, x = var_3173)[name = string("transpose_216")]; + tensor var_3213 = mul(x = hidden_states_41, y = const_92_promoted)[name = string("op_3213")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45 = concat(axis = var_3211, interleave = input_45_interleave_0, values = (hidden_states_41, var_3213))[name = string("input_45")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3208_to_fp16 = const()[name = string("op_3208_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3208_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor var_3227_to_fp16 = const()[name = string("op_3227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384000)))]; + tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3227_to_fp16)[name = string("q_5_cast_fp16")]; + int32 var_3238 = const()[name = string("op_3238"), val = int32(-1)]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_43 = transpose(perm = var_3189, x = var_3184)[name = string("transpose_215")]; + tensor var_3240 = mul(x = hidden_states_43, y = const_96_promoted)[name = string("op_3240")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47 = concat(axis = var_3238, interleave = input_47_interleave_0, values = (hidden_states_43, var_3240))[name = string("input_47")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3235_to_fp16 = const()[name = string("op_3235_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3235_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor var_3254_to_fp16 = const()[name = string("op_3254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384576)))]; + tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3254_to_fp16)[name = string("k_5_cast_fp16")]; + tensor var_3268_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5)[name = string("op_3268_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3289_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3289_cast_fp16")]; + int32 var_3291 = const()[name = string("op_3291"), val = int32(-1)]; + bool var_3292_interleave_0 = const()[name = string("op_3292_interleave_0"), val = bool(false)]; + tensor var_3292_cast_fp16 = concat(axis = var_3291, interleave = var_3292_interleave_0, values = (var_3289_cast_fp16, x1_9_cast_fp16))[name = string("op_3292_cast_fp16")]; + tensor var_3293_cast_fp16 = mul(x = var_3292_cast_fp16, y = sin_5)[name = string("op_3293_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_3268_cast_fp16, y = var_3293_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor var_3296_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5)[name = string("op_3296_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3317_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_105_promoted_to_fp16)[name = string("op_3317_cast_fp16")]; + int32 var_3319 = const()[name = string("op_3319"), val = int32(-1)]; + bool var_3320_interleave_0 = const()[name = string("op_3320_interleave_0"), val = bool(false)]; + tensor var_3320_cast_fp16 = concat(axis = var_3319, interleave = var_3320_interleave_0, values = (var_3317_cast_fp16, x1_11_cast_fp16))[name = string("op_3320_cast_fp16")]; + tensor var_3321_cast_fp16 = mul(x = var_3320_cast_fp16, y = sin_5)[name = string("op_3321_cast_fp16")]; + tensor key_states_23_cast_fp16 = add(x = var_3296_cast_fp16, y = var_3321_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, end_pos_1, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_108")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([24])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([25])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, end_pos_1, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19 = transpose(perm = var_3200, x = var_3195)[name = string("transpose_214")]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_109")]; + tensor var_3420_begin_0 = const()[name = string("op_3420_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_3420_end_0 = const()[name = string("op_3420_end_0"), val = tensor([3, 1, 512, 256])]; + tensor var_3420_end_mask_0 = const()[name = string("op_3420_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3420_cast_fp16 = slice_by_index(begin = var_3420_begin_0, end = var_3420_end_0, end_mask = var_3420_end_mask_0, x = coreml_update_state_57)[name = string("op_3420_cast_fp16")]; + tensor var_3427_begin_0 = const()[name = string("op_3427_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_3427_end_0 = const()[name = string("op_3427_end_0"), val = tensor([25, 1, 512, 256])]; + tensor var_3427_end_mask_0 = const()[name = string("op_3427_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3427_cast_fp16 = slice_by_index(begin = var_3427_begin_0, end = var_3427_end_0, end_mask = var_3427_end_mask_0, x = coreml_update_state_57)[name = string("op_3427_cast_fp16")]; + tensor var_3466 = const()[name = string("op_3466"), val = tensor([1, 4, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_3466, x = var_3420_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_3486 = const()[name = string("op_3486"), val = tensor([1, 4, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_3486, x = var_3427_cast_fp16)[name = string("x_43_cast_fp16")]; + bool var_3513_transpose_x_0 = const()[name = string("op_3513_transpose_x_0"), val = bool(false)]; + bool var_3513_transpose_y_0 = const()[name = string("op_3513_transpose_y_0"), val = bool(true)]; + tensor var_3513 = matmul(transpose_x = var_3513_transpose_x_0, transpose_y = var_3513_transpose_y_0, x = query_states_19_cast_fp16, y = x_37_cast_fp16)[name = string("op_3513")]; + fp16 var_3514_to_fp16 = const()[name = string("op_3514_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_3513, y = var_3514_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask_slice_1)[name = string("attn_weights_11_cast_fp16")]; + int32 var_3549 = const()[name = string("op_3549"), val = int32(-1)]; + tensor var_3551_cast_fp16 = softmax(axis = var_3549, x = attn_weights_11_cast_fp16)[name = string("op_3551_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([4, 64, 512])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_3551_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([4, 512, 256])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_43_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 4, 64, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_3563_perm_0 = const()[name = string("op_3563_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3582 = const()[name = string("op_3582"), val = tensor([1, 64, 1024])]; + tensor var_3563_cast_fp16 = transpose(perm = var_3563_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_213")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_3582, x = var_3563_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3587 = const()[name = string("op_3587"), val = tensor([0, 2, 1])]; + string var_3603_pad_type_0 = const()[name = string("op_3603_pad_type_0"), val = string("valid")]; + int32 var_3603_groups_0 = const()[name = string("op_3603_groups_0"), val = int32(1)]; + tensor var_3603_strides_0 = const()[name = string("op_3603_strides_0"), val = tensor([1])]; + tensor var_3603_pad_0 = const()[name = string("op_3603_pad_0"), val = tensor([0, 0])]; + tensor var_3603_dilations_0 = const()[name = string("op_3603_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418385152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419269952))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3588_cast_fp16 = transpose(perm = var_3587, x = attn_output_25_cast_fp16)[name = string("transpose_212")]; + tensor var_3603_cast_fp16 = conv(dilations = var_3603_dilations_0, groups = var_3603_groups_0, pad = var_3603_pad_0, pad_type = var_3603_pad_type_0, strides = var_3603_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3588_cast_fp16)[name = string("op_3603_cast_fp16")]; + tensor var_3607 = const()[name = string("op_3607"), val = tensor([0, 2, 1])]; + int32 var_3618 = const()[name = string("op_3618"), val = int32(-1)]; + fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_45_cast_fp16 = transpose(perm = var_3607, x = var_3603_cast_fp16)[name = string("transpose_211")]; + tensor var_3620_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_117_promoted_to_fp16)[name = string("op_3620_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_3618, interleave = input_51_interleave_0, values = (hidden_states_45_cast_fp16, var_3620_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3615_to_fp16 = const()[name = string("op_3615_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3615_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor var_3634_to_fp16 = const()[name = string("op_3634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419306880)))]; + tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3634_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + int32 var_3647 = const()[name = string("op_3647"), val = int32(-1)]; + fp16 const_121_promoted_to_fp16 = const()[name = string("const_121_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3649_cast_fp16 = mul(x = hidden_states_47_cast_fp16, y = const_121_promoted_to_fp16)[name = string("op_3649_cast_fp16")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53_cast_fp16 = concat(axis = var_3647, interleave = input_53_interleave_0, values = (hidden_states_47_cast_fp16, var_3649_cast_fp16))[name = string("input_53_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3644_to_fp16 = const()[name = string("op_3644_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3644_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor var_3663_to_fp16 = const()[name = string("op_3663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419309248)))]; + tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3663_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_3675 = const()[name = string("op_3675"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_3676_cast_fp16 = transpose(perm = var_3675, x = x_45_cast_fp16)[name = string("transpose_210")]; + tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3676_cast_fp16)[name = string("input_55_cast_fp16")]; + string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; + tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; + tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; + int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419311616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425283648))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425504896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431476928))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; + string var_3701_mode_0 = const()[name = string("op_3701_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3701_cast_fp16 = gelu(mode = var_3701_mode_0, x = x_47_cast_fp16)[name = string("op_3701_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = var_3701_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431698176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437670208))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; + tensor var_3709_axes_0 = const()[name = string("op_3709_axes_0"), val = tensor([2])]; + tensor var_3709_cast_fp16 = squeeze(axes = var_3709_axes_0, x = e_5_cast_fp16)[name = string("op_3709_cast_fp16")]; + tensor var_3710 = const()[name = string("op_3710"), val = tensor([0, 2, 1])]; + int32 var_3721 = const()[name = string("op_3721"), val = int32(-1)]; + fp16 const_125_promoted_to_fp16 = const()[name = string("const_125_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_49_cast_fp16 = transpose(perm = var_3710, x = var_3709_cast_fp16)[name = string("transpose_209")]; + tensor var_3723_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_125_promoted_to_fp16)[name = string("op_3723_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_3721, interleave = input_59_interleave_0, values = (hidden_states_49_cast_fp16, var_3723_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3718_to_fp16 = const()[name = string("op_3718_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3718_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; + tensor var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437707136)))]; + tensor hidden_states_51_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3737_to_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + int32 var_3791 = const()[name = string("op_3791"), val = int32(-1)]; + fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3793_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_3793_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_3791, interleave = input_61_interleave_0, values = (hidden_states_53_cast_fp16, var_3793_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3788_to_fp16 = const()[name = string("op_3788_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3788_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; + tensor var_3807_to_fp16 = const()[name = string("op_3807_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437709504)))]; + tensor hidden_states_55_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3807_to_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor var_3818 = const()[name = string("op_3818"), val = tensor([0, 2, 1])]; + tensor var_3821_axes_0 = const()[name = string("op_3821_axes_0"), val = tensor([2])]; + tensor var_3819_cast_fp16 = transpose(perm = var_3818, x = hidden_states_55_cast_fp16)[name = string("transpose_208")]; + tensor var_3821_cast_fp16 = expand_dims(axes = var_3821_axes_0, x = var_3819_cast_fp16)[name = string("op_3821_cast_fp16")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3821_cast_fp16)[name = string("query_states_25")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3821_cast_fp16)[name = string("key_states_31")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3821_cast_fp16)[name = string("value_states_25")]; + tensor var_3863 = const()[name = string("op_3863"), val = tensor([1, 4, 256, 64])]; + tensor var_3864 = reshape(shape = var_3863, x = query_states_25)[name = string("op_3864")]; + tensor var_3869 = const()[name = string("op_3869"), val = tensor([0, 1, 3, 2])]; + tensor var_3874 = const()[name = string("op_3874"), val = tensor([1, 1, 256, 64])]; + tensor var_3875 = reshape(shape = var_3874, x = key_states_31)[name = string("op_3875")]; + tensor var_3880 = const()[name = string("op_3880"), val = tensor([0, 1, 3, 2])]; + tensor var_3885 = const()[name = string("op_3885"), val = tensor([1, 1, 256, 64])]; + tensor var_3886 = reshape(shape = var_3885, x = value_states_25)[name = string("op_3886")]; + tensor var_3891 = const()[name = string("op_3891"), val = tensor([0, 1, 3, 2])]; + int32 var_3902 = const()[name = string("op_3902"), val = int32(-1)]; + fp16 const_135_promoted = const()[name = string("const_135_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_57 = transpose(perm = var_3869, x = var_3864)[name = string("transpose_207")]; + tensor var_3904 = mul(x = hidden_states_57, y = const_135_promoted)[name = string("op_3904")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_3902, interleave = input_65_interleave_0, values = (hidden_states_57, var_3904))[name = string("input_65")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3899_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; + tensor var_3918_to_fp16 = const()[name = string("op_3918_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437711872)))]; + tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3918_to_fp16)[name = string("q_7_cast_fp16")]; + int32 var_3929 = const()[name = string("op_3929"), val = int32(-1)]; + fp16 const_139_promoted = const()[name = string("const_139_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_59 = transpose(perm = var_3880, x = var_3875)[name = string("transpose_206")]; + tensor var_3931 = mul(x = hidden_states_59, y = const_139_promoted)[name = string("op_3931")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_3929, interleave = input_67_interleave_0, values = (hidden_states_59, var_3931))[name = string("input_67")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_3926_to_fp16 = const()[name = string("op_3926_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3926_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; + tensor var_3945_to_fp16 = const()[name = string("op_3945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437712448)))]; + tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3945_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_3959_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5)[name = string("op_3959_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_145_promoted_to_fp16 = const()[name = string("const_145_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3980_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_145_promoted_to_fp16)[name = string("op_3980_cast_fp16")]; + int32 var_3982 = const()[name = string("op_3982"), val = int32(-1)]; + bool var_3983_interleave_0 = const()[name = string("op_3983_interleave_0"), val = bool(false)]; + tensor var_3983_cast_fp16 = concat(axis = var_3982, interleave = var_3983_interleave_0, values = (var_3980_cast_fp16, x1_13_cast_fp16))[name = string("op_3983_cast_fp16")]; + tensor var_3984_cast_fp16 = mul(x = var_3983_cast_fp16, y = sin_5)[name = string("op_3984_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_3959_cast_fp16, y = var_3984_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor var_3987_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5)[name = string("op_3987_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4008_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_4008_cast_fp16")]; + int32 var_4010 = const()[name = string("op_4010"), val = int32(-1)]; + bool var_4011_interleave_0 = const()[name = string("op_4011_interleave_0"), val = bool(false)]; + tensor var_4011_cast_fp16 = concat(axis = var_4010, interleave = var_4011_interleave_0, values = (var_4008_cast_fp16, x1_15_cast_fp16))[name = string("op_4011_cast_fp16")]; + tensor var_4012_cast_fp16 = mul(x = var_4011_cast_fp16, y = sin_5)[name = string("op_4012_cast_fp16")]; + tensor key_states_33_cast_fp16 = add(x = var_3987_cast_fp16, y = var_4012_cast_fp16)[name = string("key_states_33_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, end_pos_1, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = key_states_33_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_110")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([25])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([26])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, end_pos_1, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_3891, x = var_3886)[name = string("transpose_205")]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_111")]; + tensor var_4111_begin_0 = const()[name = string("op_4111_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_4111_end_0 = const()[name = string("op_4111_end_0"), val = tensor([4, 1, 512, 256])]; + tensor var_4111_end_mask_0 = const()[name = string("op_4111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4111_cast_fp16 = slice_by_index(begin = var_4111_begin_0, end = var_4111_end_0, end_mask = var_4111_end_mask_0, x = coreml_update_state_59)[name = string("op_4111_cast_fp16")]; + tensor var_4118_begin_0 = const()[name = string("op_4118_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_4118_end_0 = const()[name = string("op_4118_end_0"), val = tensor([26, 1, 512, 256])]; + tensor var_4118_end_mask_0 = const()[name = string("op_4118_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4118_cast_fp16 = slice_by_index(begin = var_4118_begin_0, end = var_4118_end_0, end_mask = var_4118_end_mask_0, x = coreml_update_state_59)[name = string("op_4118_cast_fp16")]; + tensor var_4157 = const()[name = string("op_4157"), val = tensor([1, 4, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_4157, x = var_4111_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_4177 = const()[name = string("op_4177"), val = tensor([1, 4, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_4177, x = var_4118_cast_fp16)[name = string("x_59_cast_fp16")]; + bool var_4204_transpose_x_0 = const()[name = string("op_4204_transpose_x_0"), val = bool(false)]; + bool var_4204_transpose_y_0 = const()[name = string("op_4204_transpose_y_0"), val = bool(true)]; + tensor var_4204 = matmul(transpose_x = var_4204_transpose_x_0, transpose_y = var_4204_transpose_y_0, x = query_states_27_cast_fp16, y = x_53_cast_fp16)[name = string("op_4204")]; + fp16 var_4205_to_fp16 = const()[name = string("op_4205_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_4204, y = var_4205_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = mask_slice_1)[name = string("attn_weights_15_cast_fp16")]; + int32 var_4240 = const()[name = string("op_4240"), val = int32(-1)]; + tensor var_4242_cast_fp16 = softmax(axis = var_4240, x = attn_weights_15_cast_fp16)[name = string("op_4242_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([4, 64, 512])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_4242_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([4, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_59_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 4, 64, 256])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_4254_perm_0 = const()[name = string("op_4254_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4273 = const()[name = string("op_4273"), val = tensor([1, 64, 1024])]; + tensor var_4254_cast_fp16 = transpose(perm = var_4254_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_204")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_4273, x = var_4254_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_4278 = const()[name = string("op_4278"), val = tensor([0, 2, 1])]; + string var_4294_pad_type_0 = const()[name = string("op_4294_pad_type_0"), val = string("valid")]; + int32 var_4294_groups_0 = const()[name = string("op_4294_groups_0"), val = int32(1)]; + tensor var_4294_strides_0 = const()[name = string("op_4294_strides_0"), val = tensor([1])]; + tensor var_4294_pad_0 = const()[name = string("op_4294_pad_0"), val = tensor([0, 0])]; + tensor var_4294_dilations_0 = const()[name = string("op_4294_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437713024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597824))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4279_cast_fp16 = transpose(perm = var_4278, x = attn_output_35_cast_fp16)[name = string("transpose_203")]; + tensor var_4294_cast_fp16 = conv(dilations = var_4294_dilations_0, groups = var_4294_groups_0, pad = var_4294_pad_0, pad_type = var_4294_pad_type_0, strides = var_4294_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_4279_cast_fp16)[name = string("op_4294_cast_fp16")]; + tensor var_4298 = const()[name = string("op_4298"), val = tensor([0, 2, 1])]; + int32 var_4309 = const()[name = string("op_4309"), val = int32(-1)]; + fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_61_cast_fp16 = transpose(perm = var_4298, x = var_4294_cast_fp16)[name = string("transpose_202")]; + tensor var_4311_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_160_promoted_to_fp16)[name = string("op_4311_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_4309, interleave = input_71_interleave_0, values = (hidden_states_61_cast_fp16, var_4311_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4306_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; + tensor var_4325_to_fp16 = const()[name = string("op_4325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438634752)))]; + tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4325_to_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + int32 var_4338 = const()[name = string("op_4338"), val = int32(-1)]; + fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4340_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4340_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_4338, interleave = input_73_interleave_0, values = (hidden_states_63_cast_fp16, var_4340_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4335_to_fp16 = const()[name = string("op_4335_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4335_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; + tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438637120)))]; + tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4354_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_4366 = const()[name = string("op_4366"), val = tensor([0, 2, 1])]; + tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; + tensor var_4367_cast_fp16 = transpose(perm = var_4366, x = x_61_cast_fp16)[name = string("transpose_201")]; + tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4367_cast_fp16)[name = string("input_75_cast_fp16")]; + string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; + tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; + tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; + int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438639488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444611520))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450804800))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; + string var_4392_mode_0 = const()[name = string("op_4392_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4392_cast_fp16 = gelu(mode = var_4392_mode_0, x = x_63_cast_fp16)[name = string("op_4392_cast_fp16")]; + tensor input_77_cast_fp16 = mul(x = var_4392_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451026048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456998080))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; + tensor var_4400_axes_0 = const()[name = string("op_4400_axes_0"), val = tensor([2])]; + tensor var_4400_cast_fp16 = squeeze(axes = var_4400_axes_0, x = e_7_cast_fp16)[name = string("op_4400_cast_fp16")]; + tensor var_4401 = const()[name = string("op_4401"), val = tensor([0, 2, 1])]; + int32 var_4412 = const()[name = string("op_4412"), val = int32(-1)]; + fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_65_cast_fp16 = transpose(perm = var_4401, x = var_4400_cast_fp16)[name = string("transpose_200")]; + tensor var_4414_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_4414_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_4412, interleave = input_79_interleave_0, values = (hidden_states_65_cast_fp16, var_4414_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4409_to_fp16 = const()[name = string("op_4409_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4409_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor var_4428_to_fp16 = const()[name = string("op_4428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457035008)))]; + tensor hidden_states_67_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4428_to_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = hidden_states_67_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_4482 = const()[name = string("op_4482"), val = int32(-1)]; + fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4484_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4484_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_4482, interleave = input_81_interleave_0, values = (hidden_states_69_cast_fp16, var_4484_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4479_to_fp16 = const()[name = string("op_4479_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4479_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor var_4498_to_fp16 = const()[name = string("op_4498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457037376)))]; + tensor hidden_states_71_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4498_to_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor var_4509 = const()[name = string("op_4509"), val = tensor([0, 2, 1])]; + tensor var_4512_axes_0 = const()[name = string("op_4512_axes_0"), val = tensor([2])]; + tensor var_4510_cast_fp16 = transpose(perm = var_4509, x = hidden_states_71_cast_fp16)[name = string("transpose_199")]; + tensor var_4512_cast_fp16 = expand_dims(axes = var_4512_axes_0, x = var_4510_cast_fp16)[name = string("op_4512_cast_fp16")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4512_cast_fp16)[name = string("query_states_33")]; + string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; + tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; + tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; + int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; + tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4512_cast_fp16)[name = string("key_states_41")]; + string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; + tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; + tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; + int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; + tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4512_cast_fp16)[name = string("value_states_33")]; + tensor var_4554 = const()[name = string("op_4554"), val = tensor([1, 4, 256, 64])]; + tensor var_4555 = reshape(shape = var_4554, x = query_states_33)[name = string("op_4555")]; + tensor var_4560 = const()[name = string("op_4560"), val = tensor([0, 1, 3, 2])]; + tensor var_4565 = const()[name = string("op_4565"), val = tensor([1, 1, 256, 64])]; + tensor var_4566 = reshape(shape = var_4565, x = key_states_41)[name = string("op_4566")]; + tensor var_4571 = const()[name = string("op_4571"), val = tensor([0, 1, 3, 2])]; + tensor var_4576 = const()[name = string("op_4576"), val = tensor([1, 1, 256, 64])]; + tensor var_4577 = reshape(shape = var_4576, x = value_states_33)[name = string("op_4577")]; + tensor var_4582 = const()[name = string("op_4582"), val = tensor([0, 1, 3, 2])]; + int32 var_4593 = const()[name = string("op_4593"), val = int32(-1)]; + fp16 const_178_promoted = const()[name = string("const_178_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_73 = transpose(perm = var_4560, x = var_4555)[name = string("transpose_198")]; + tensor var_4595 = mul(x = hidden_states_73, y = const_178_promoted)[name = string("op_4595")]; + bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; + tensor input_85 = concat(axis = var_4593, interleave = input_85_interleave_0, values = (hidden_states_73, var_4595))[name = string("input_85")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4590_to_fp16 = const()[name = string("op_4590_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4590_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor var_4609_to_fp16 = const()[name = string("op_4609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457039744)))]; + tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4609_to_fp16)[name = string("q_9_cast_fp16")]; + int32 var_4620 = const()[name = string("op_4620"), val = int32(-1)]; + fp16 const_182_promoted = const()[name = string("const_182_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_75 = transpose(perm = var_4571, x = var_4566)[name = string("transpose_197")]; + tensor var_4622 = mul(x = hidden_states_75, y = const_182_promoted)[name = string("op_4622")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_4620, interleave = input_87_interleave_0, values = (hidden_states_75, var_4622))[name = string("input_87")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4617_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor var_4636_to_fp16 = const()[name = string("op_4636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040320)))]; + tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4636_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_4650_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5)[name = string("op_4650_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4671_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_188_promoted_to_fp16)[name = string("op_4671_cast_fp16")]; + int32 var_4673 = const()[name = string("op_4673"), val = int32(-1)]; + bool var_4674_interleave_0 = const()[name = string("op_4674_interleave_0"), val = bool(false)]; + tensor var_4674_cast_fp16 = concat(axis = var_4673, interleave = var_4674_interleave_0, values = (var_4671_cast_fp16, x1_17_cast_fp16))[name = string("op_4674_cast_fp16")]; + tensor var_4675_cast_fp16 = mul(x = var_4674_cast_fp16, y = sin_5)[name = string("op_4675_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_4650_cast_fp16, y = var_4675_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor var_4678_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5)[name = string("op_4678_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_191_promoted_to_fp16 = const()[name = string("const_191_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4699_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_191_promoted_to_fp16)[name = string("op_4699_cast_fp16")]; + int32 var_4701 = const()[name = string("op_4701"), val = int32(-1)]; + bool var_4702_interleave_0 = const()[name = string("op_4702_interleave_0"), val = bool(false)]; + tensor var_4702_cast_fp16 = concat(axis = var_4701, interleave = var_4702_interleave_0, values = (var_4699_cast_fp16, x1_19_cast_fp16))[name = string("op_4702_cast_fp16")]; + tensor var_4703_cast_fp16 = mul(x = var_4702_cast_fp16, y = sin_5)[name = string("op_4703_cast_fp16")]; + tensor key_states_43_cast_fp16 = add(x = var_4678_cast_fp16, y = var_4703_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, end_pos_1, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_112")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([26])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([27])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, end_pos_1, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_35 = transpose(perm = var_4582, x = var_4577)[name = string("transpose_196")]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_113")]; + tensor var_4802_begin_0 = const()[name = string("op_4802_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_4802_end_0 = const()[name = string("op_4802_end_0"), val = tensor([5, 1, 512, 256])]; + tensor var_4802_end_mask_0 = const()[name = string("op_4802_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4802_cast_fp16 = slice_by_index(begin = var_4802_begin_0, end = var_4802_end_0, end_mask = var_4802_end_mask_0, x = coreml_update_state_61)[name = string("op_4802_cast_fp16")]; + tensor var_4809_begin_0 = const()[name = string("op_4809_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_4809_end_0 = const()[name = string("op_4809_end_0"), val = tensor([27, 1, 512, 256])]; + tensor var_4809_end_mask_0 = const()[name = string("op_4809_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4809_cast_fp16 = slice_by_index(begin = var_4809_begin_0, end = var_4809_end_0, end_mask = var_4809_end_mask_0, x = coreml_update_state_61)[name = string("op_4809_cast_fp16")]; + tensor var_4848 = const()[name = string("op_4848"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_4848, x = var_4802_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_4868 = const()[name = string("op_4868"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_4868, x = var_4809_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_4895_transpose_x_0 = const()[name = string("op_4895_transpose_x_0"), val = bool(false)]; + bool var_4895_transpose_y_0 = const()[name = string("op_4895_transpose_y_0"), val = bool(true)]; + tensor var_4895 = matmul(transpose_x = var_4895_transpose_x_0, transpose_y = var_4895_transpose_y_0, x = query_states_35_cast_fp16, y = x_69_cast_fp16)[name = string("op_4895")]; + fp16 var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_4895, y = var_4896_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = mask_slice_1)[name = string("attn_weights_19_cast_fp16")]; + int32 var_4931 = const()[name = string("op_4931"), val = int32(-1)]; + tensor var_4933_cast_fp16 = softmax(axis = var_4931, x = attn_weights_19_cast_fp16)[name = string("op_4933_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([4, 64, 512])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_4933_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([4, 512, 256])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_75_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 4, 64, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_4945_perm_0 = const()[name = string("op_4945_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4964 = const()[name = string("op_4964"), val = tensor([1, 64, 1024])]; + tensor var_4945_cast_fp16 = transpose(perm = var_4945_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_195")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_4964, x = var_4945_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_4969 = const()[name = string("op_4969"), val = tensor([0, 2, 1])]; + string var_4985_pad_type_0 = const()[name = string("op_4985_pad_type_0"), val = string("valid")]; + int32 var_4985_groups_0 = const()[name = string("op_4985_groups_0"), val = int32(1)]; + tensor var_4985_strides_0 = const()[name = string("op_4985_strides_0"), val = tensor([1])]; + tensor var_4985_pad_0 = const()[name = string("op_4985_pad_0"), val = tensor([0, 0])]; + tensor var_4985_dilations_0 = const()[name = string("op_4985_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457925696))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4970_cast_fp16 = transpose(perm = var_4969, x = attn_output_45_cast_fp16)[name = string("transpose_194")]; + tensor var_4985_cast_fp16 = conv(dilations = var_4985_dilations_0, groups = var_4985_groups_0, pad = var_4985_pad_0, pad_type = var_4985_pad_type_0, strides = var_4985_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4970_cast_fp16)[name = string("op_4985_cast_fp16")]; + tensor var_4989 = const()[name = string("op_4989"), val = tensor([0, 2, 1])]; + int32 var_5000 = const()[name = string("op_5000"), val = int32(-1)]; + fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_77_cast_fp16 = transpose(perm = var_4989, x = var_4985_cast_fp16)[name = string("transpose_193")]; + tensor var_5002_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5002_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_5000, interleave = input_91_interleave_0, values = (hidden_states_77_cast_fp16, var_5002_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_4997_to_fp16 = const()[name = string("op_4997_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4997_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor var_5016_to_fp16 = const()[name = string("op_5016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457962624)))]; + tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_5016_to_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_5029 = const()[name = string("op_5029"), val = int32(-1)]; + fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5031_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_5031_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_5029, interleave = input_93_interleave_0, values = (hidden_states_79_cast_fp16, var_5031_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_5026_to_fp16 = const()[name = string("op_5026_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5026_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor var_5045_to_fp16 = const()[name = string("op_5045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457964992)))]; + tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_5045_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_5057 = const()[name = string("op_5057"), val = tensor([0, 2, 1])]; + tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; + tensor var_5058_cast_fp16 = transpose(perm = var_5057, x = x_77_cast_fp16)[name = string("transpose_192")]; + tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_5058_cast_fp16)[name = string("input_95_cast_fp16")]; + string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; + tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; + tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; + int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463939392))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464160640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470132672))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; + string var_5083_mode_0 = const()[name = string("op_5083_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5083_cast_fp16 = gelu(mode = var_5083_mode_0, x = x_79_cast_fp16)[name = string("op_5083_cast_fp16")]; + tensor input_97_cast_fp16 = mul(x = var_5083_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470353920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476325952))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; + tensor var_5091_axes_0 = const()[name = string("op_5091_axes_0"), val = tensor([2])]; + tensor var_5091_cast_fp16 = squeeze(axes = var_5091_axes_0, x = e_9_cast_fp16)[name = string("op_5091_cast_fp16")]; + tensor var_5092 = const()[name = string("op_5092"), val = tensor([0, 2, 1])]; + int32 var_5103 = const()[name = string("op_5103"), val = int32(-1)]; + fp16 const_211_promoted_to_fp16 = const()[name = string("const_211_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_81_cast_fp16 = transpose(perm = var_5092, x = var_5091_cast_fp16)[name = string("transpose_191")]; + tensor var_5105_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_211_promoted_to_fp16)[name = string("op_5105_cast_fp16")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99_cast_fp16 = concat(axis = var_5103, interleave = input_99_interleave_0, values = (hidden_states_81_cast_fp16, var_5105_cast_fp16))[name = string("input_99_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_5100_to_fp16 = const()[name = string("op_5100_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5100_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; + tensor var_5119_to_fp16 = const()[name = string("op_5119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476362880)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_5119_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + int32 var_5136_axis_0 = const()[name = string("op_5136_axis_0"), val = int32(1)]; + int32 var_5136_batch_dims_0 = const()[name = string("op_5136_batch_dims_0"), val = int32(0)]; + bool var_5136_validate_indices_0 = const()[name = string("op_5136_validate_indices_0"), val = bool(false)]; + tensor var_5128_to_fp16 = const()[name = string("op_5128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480559616)))]; + tensor var_5136_cast_fp16_cast_uint16 = gather(axis = var_5136_axis_0, batch_dims = var_5136_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5136_validate_indices_0, x = var_5128_to_fp16)[name = string("op_5136_cast_fp16_cast_uint16")]; + tensor var_5140 = const()[name = string("op_5140"), val = tensor([1, 64, 1, 256])]; + tensor cos_31_cast_fp16 = reshape(shape = var_5140, x = var_5136_cast_fp16_cast_uint16)[name = string("cos_31_cast_fp16")]; + int32 var_5150_axis_0 = const()[name = string("op_5150_axis_0"), val = int32(1)]; + int32 var_5150_batch_dims_0 = const()[name = string("op_5150_batch_dims_0"), val = int32(0)]; + bool var_5150_validate_indices_0 = const()[name = string("op_5150_validate_indices_0"), val = bool(false)]; + tensor var_5142_to_fp16 = const()[name = string("op_5142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476365248)))]; + tensor var_5150_cast_fp16_cast_uint16 = gather(axis = var_5150_axis_0, batch_dims = var_5150_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5150_validate_indices_0, x = var_5142_to_fp16)[name = string("op_5150_cast_fp16_cast_uint16")]; + tensor var_5154 = const()[name = string("op_5154"), val = tensor([1, 64, 1, 256])]; + tensor sin_31_cast_fp16 = reshape(shape = var_5154, x = var_5150_cast_fp16_cast_uint16)[name = string("sin_31_cast_fp16")]; + int32 var_5175 = const()[name = string("op_5175"), val = int32(-1)]; + fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5177_cast_fp16 = mul(x = hidden_states_85_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5177_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_5175, interleave = input_101_interleave_0, values = (hidden_states_85_cast_fp16, var_5177_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_5172_to_fp16 = const()[name = string("op_5172_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5172_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; + tensor var_5191_to_fp16 = const()[name = string("op_5191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484753984)))]; + tensor hidden_states_87_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_5191_to_fp16)[name = string("hidden_states_87_cast_fp16")]; + tensor var_5202 = const()[name = string("op_5202"), val = tensor([0, 2, 1])]; + tensor var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor([2])]; + tensor var_5203_cast_fp16 = transpose(perm = var_5202, x = hidden_states_87_cast_fp16)[name = string("transpose_190")]; + tensor var_5205_cast_fp16 = expand_dims(axes = var_5205_axes_0, x = var_5203_cast_fp16)[name = string("op_5205_cast_fp16")]; + string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; + tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; + tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; + int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; + tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_5205_cast_fp16)[name = string("query_states_41")]; + string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; + tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; + tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; + int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; + tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_5205_cast_fp16)[name = string("key_states_51")]; + string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; + tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; + tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; + int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; + tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_5205_cast_fp16)[name = string("value_states_41")]; + tensor var_5247 = const()[name = string("op_5247"), val = tensor([1, 4, 256, 64])]; + tensor var_5248 = reshape(shape = var_5247, x = query_states_41)[name = string("op_5248")]; + tensor var_5253 = const()[name = string("op_5253"), val = tensor([0, 1, 3, 2])]; + tensor var_5258 = const()[name = string("op_5258"), val = tensor([1, 1, 256, 64])]; + tensor var_5259 = reshape(shape = var_5258, x = key_states_51)[name = string("op_5259")]; + tensor var_5264 = const()[name = string("op_5264"), val = tensor([0, 1, 3, 2])]; + tensor var_5269 = const()[name = string("op_5269"), val = tensor([1, 1, 256, 64])]; + tensor var_5270 = reshape(shape = var_5269, x = value_states_41)[name = string("op_5270")]; + tensor var_5275 = const()[name = string("op_5275"), val = tensor([0, 1, 3, 2])]; + int32 var_5286 = const()[name = string("op_5286"), val = int32(-1)]; + fp16 const_221_promoted = const()[name = string("const_221_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_89 = transpose(perm = var_5253, x = var_5248)[name = string("transpose_189")]; + tensor var_5288 = mul(x = hidden_states_89, y = const_221_promoted)[name = string("op_5288")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105 = concat(axis = var_5286, interleave = input_105_interleave_0, values = (hidden_states_89, var_5288))[name = string("input_105")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_5283_to_fp16 = const()[name = string("op_5283_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5283_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; + tensor var_5302_to_fp16 = const()[name = string("op_5302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756352)))]; + tensor q_11_cast_fp16 = mul(x = normed_127, y = var_5302_to_fp16)[name = string("q_11_cast_fp16")]; + int32 var_5313 = const()[name = string("op_5313"), val = int32(-1)]; + fp16 const_225_promoted = const()[name = string("const_225_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_91 = transpose(perm = var_5264, x = var_5259)[name = string("transpose_188")]; + tensor var_5315 = mul(x = hidden_states_91, y = const_225_promoted)[name = string("op_5315")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107 = concat(axis = var_5313, interleave = input_107_interleave_0, values = (hidden_states_91, var_5315))[name = string("input_107")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_5310_to_fp16 = const()[name = string("op_5310_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5310_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; + tensor var_5329_to_fp16 = const()[name = string("op_5329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756928)))]; + tensor k_11_cast_fp16 = mul(x = normed_131, y = var_5329_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_5335 = const()[name = string("op_5335"), val = tensor([0, 2, 1, 3])]; + tensor var_5341 = const()[name = string("op_5341"), val = tensor([0, 2, 1, 3])]; + tensor cos_35 = transpose(perm = var_5335, x = cos_31_cast_fp16)[name = string("transpose_187")]; + tensor var_5343_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_35)[name = string("op_5343_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_231_promoted_to_fp16 = const()[name = string("const_231_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5364_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_231_promoted_to_fp16)[name = string("op_5364_cast_fp16")]; + int32 var_5366 = const()[name = string("op_5366"), val = int32(-1)]; + bool var_5367_interleave_0 = const()[name = string("op_5367_interleave_0"), val = bool(false)]; + tensor var_5367_cast_fp16 = concat(axis = var_5366, interleave = var_5367_interleave_0, values = (var_5364_cast_fp16, x1_21_cast_fp16))[name = string("op_5367_cast_fp16")]; + tensor sin_35 = transpose(perm = var_5341, x = sin_31_cast_fp16)[name = string("transpose_186")]; + tensor var_5368_cast_fp16 = mul(x = var_5367_cast_fp16, y = sin_35)[name = string("op_5368_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_5343_cast_fp16, y = var_5368_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor var_5371_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_35)[name = string("op_5371_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5392_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_5392_cast_fp16")]; + int32 var_5394 = const()[name = string("op_5394"), val = int32(-1)]; + bool var_5395_interleave_0 = const()[name = string("op_5395_interleave_0"), val = bool(false)]; + tensor var_5395_cast_fp16 = concat(axis = var_5394, interleave = var_5395_interleave_0, values = (var_5392_cast_fp16, x1_23_cast_fp16))[name = string("op_5395_cast_fp16")]; + tensor var_5396_cast_fp16 = mul(x = var_5395_cast_fp16, y = sin_35)[name = string("op_5396_cast_fp16")]; + tensor key_states_53_cast_fp16 = add(x = var_5371_cast_fp16, y = var_5396_cast_fp16)[name = string("key_states_53_cast_fp16")]; + tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_53_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_114")]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_43 = transpose(perm = var_5275, x = var_5270)[name = string("transpose_185")]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = value_states_43, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_115")]; + tensor var_5495_begin_0 = const()[name = string("op_5495_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5495_end_0 = const()[name = string("op_5495_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_5495_end_mask_0 = const()[name = string("op_5495_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5495_cast_fp16 = slice_by_index(begin = var_5495_begin_0, end = var_5495_end_0, end_mask = var_5495_end_mask_0, x = coreml_update_state_63)[name = string("op_5495_cast_fp16")]; + tensor var_5502_begin_0 = const()[name = string("op_5502_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_5502_end_0 = const()[name = string("op_5502_end_0"), val = tensor([5, 1, 4096, 256])]; + tensor var_5502_end_mask_0 = const()[name = string("op_5502_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5502_cast_fp16 = slice_by_index(begin = var_5502_begin_0, end = var_5502_end_0, end_mask = var_5502_end_mask_0, x = coreml_update_state_63)[name = string("op_5502_cast_fp16")]; + tensor var_5541 = const()[name = string("op_5541"), val = tensor([1, 4, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_5541, x = var_5495_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_5561 = const()[name = string("op_5561"), val = tensor([1, 4, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_5561, x = var_5502_cast_fp16)[name = string("x_91_cast_fp16")]; + bool var_5588_transpose_x_0 = const()[name = string("op_5588_transpose_x_0"), val = bool(false)]; + bool var_5588_transpose_y_0 = const()[name = string("op_5588_transpose_y_0"), val = bool(true)]; + tensor var_5588 = matmul(transpose_x = var_5588_transpose_x_0, transpose_y = var_5588_transpose_y_0, x = query_states_43_cast_fp16, y = x_85_cast_fp16)[name = string("op_5588")]; + fp16 var_5589_to_fp16 = const()[name = string("op_5589_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_5588, y = var_5589_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; + int32 var_5624 = const()[name = string("op_5624"), val = int32(-1)]; + tensor var_5626_cast_fp16 = softmax(axis = var_5624, x = attn_weights_23_cast_fp16)[name = string("op_5626_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([4, 64, 4096])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_5626_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([4, 4096, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_91_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 4, 64, 256])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_5638_perm_0 = const()[name = string("op_5638_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5657 = const()[name = string("op_5657"), val = tensor([1, 64, 1024])]; + tensor var_5638_cast_fp16 = transpose(perm = var_5638_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_184")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_5657, x = var_5638_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_5662 = const()[name = string("op_5662"), val = tensor([0, 2, 1])]; + string var_5678_pad_type_0 = const()[name = string("op_5678_pad_type_0"), val = string("valid")]; + int32 var_5678_groups_0 = const()[name = string("op_5678_groups_0"), val = int32(1)]; + tensor var_5678_strides_0 = const()[name = string("op_5678_strides_0"), val = tensor([1])]; + tensor var_5678_pad_0 = const()[name = string("op_5678_pad_0"), val = tensor([0, 0])]; + tensor var_5678_dilations_0 = const()[name = string("op_5678_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485642304))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5663_cast_fp16 = transpose(perm = var_5662, x = attn_output_55_cast_fp16)[name = string("transpose_183")]; + tensor var_5678_cast_fp16 = conv(dilations = var_5678_dilations_0, groups = var_5678_groups_0, pad = var_5678_pad_0, pad_type = var_5678_pad_type_0, strides = var_5678_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5663_cast_fp16)[name = string("op_5678_cast_fp16")]; + tensor var_5682 = const()[name = string("op_5682"), val = tensor([0, 2, 1])]; + int32 var_5693 = const()[name = string("op_5693"), val = int32(-1)]; + fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_93_cast_fp16 = transpose(perm = var_5682, x = var_5678_cast_fp16)[name = string("transpose_182")]; + tensor var_5695_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_246_promoted_to_fp16)[name = string("op_5695_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_5693, interleave = input_111_interleave_0, values = (hidden_states_93_cast_fp16, var_5695_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5690_to_fp16 = const()[name = string("op_5690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5690_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; + tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485679232)))]; + tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5709_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; + int32 var_5722 = const()[name = string("op_5722"), val = int32(-1)]; + fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5724_cast_fp16 = mul(x = hidden_states_95_cast_fp16, y = const_250_promoted_to_fp16)[name = string("op_5724_cast_fp16")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113_cast_fp16 = concat(axis = var_5722, interleave = input_113_interleave_0, values = (hidden_states_95_cast_fp16, var_5724_cast_fp16))[name = string("input_113_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5719_to_fp16 = const()[name = string("op_5719_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5719_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; + tensor var_5738_to_fp16 = const()[name = string("op_5738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485681600)))]; + tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5738_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_5750 = const()[name = string("op_5750"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_5751_cast_fp16 = transpose(perm = var_5750, x = x_93_cast_fp16)[name = string("transpose_181")]; + tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5751_cast_fp16)[name = string("input_115_cast_fp16")]; + string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; + tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; + tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; + int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485683968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491656000))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491877248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497849280))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; + string var_5776_mode_0 = const()[name = string("op_5776_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5776_cast_fp16 = gelu(mode = var_5776_mode_0, x = x_95_cast_fp16)[name = string("op_5776_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = var_5776_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498070528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504042560))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; + tensor var_5784_axes_0 = const()[name = string("op_5784_axes_0"), val = tensor([2])]; + tensor var_5784_cast_fp16 = squeeze(axes = var_5784_axes_0, x = e_11_cast_fp16)[name = string("op_5784_cast_fp16")]; + tensor var_5785 = const()[name = string("op_5785"), val = tensor([0, 2, 1])]; + int32 var_5796 = const()[name = string("op_5796"), val = int32(-1)]; + fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_97_cast_fp16 = transpose(perm = var_5785, x = var_5784_cast_fp16)[name = string("transpose_180")]; + tensor var_5798_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5798_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_5796, interleave = input_119_interleave_0, values = (hidden_states_97_cast_fp16, var_5798_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_5793_to_fp16 = const()[name = string("op_5793_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5793_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor var_5812_to_fp16 = const()[name = string("op_5812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504079488)))]; + tensor hidden_states_99_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5812_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + int32 var_5866 = const()[name = string("op_5866"), val = int32(-1)]; + fp16 const_259_promoted_to_fp16 = const()[name = string("const_259_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5868_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_259_promoted_to_fp16)[name = string("op_5868_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_5866, interleave = input_121_interleave_0, values = (hidden_states_101_cast_fp16, var_5868_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_5863_to_fp16 = const()[name = string("op_5863_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5863_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor var_5882_to_fp16 = const()[name = string("op_5882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504081856)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5882_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_5893 = const()[name = string("op_5893"), val = tensor([0, 2, 1])]; + tensor var_5896_axes_0 = const()[name = string("op_5896_axes_0"), val = tensor([2])]; + tensor var_5894_cast_fp16 = transpose(perm = var_5893, x = hidden_states_103_cast_fp16)[name = string("transpose_179")]; + tensor var_5896_cast_fp16 = expand_dims(axes = var_5896_axes_0, x = var_5894_cast_fp16)[name = string("op_5896_cast_fp16")]; + string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; + tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; + tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; + int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; + tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5896_cast_fp16)[name = string("query_states_49")]; + string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; + tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; + tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; + int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; + tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5896_cast_fp16)[name = string("key_states_61")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5896_cast_fp16)[name = string("value_states_49")]; + tensor var_5938 = const()[name = string("op_5938"), val = tensor([1, 4, 256, 64])]; + tensor var_5939 = reshape(shape = var_5938, x = query_states_49)[name = string("op_5939")]; + tensor var_5944 = const()[name = string("op_5944"), val = tensor([0, 1, 3, 2])]; + tensor var_5949 = const()[name = string("op_5949"), val = tensor([1, 1, 256, 64])]; + tensor var_5950 = reshape(shape = var_5949, x = key_states_61)[name = string("op_5950")]; + tensor var_5955 = const()[name = string("op_5955"), val = tensor([0, 1, 3, 2])]; + tensor var_5960 = const()[name = string("op_5960"), val = tensor([1, 1, 256, 64])]; + tensor var_5961 = reshape(shape = var_5960, x = value_states_49)[name = string("op_5961")]; + tensor var_5966 = const()[name = string("op_5966"), val = tensor([0, 1, 3, 2])]; + int32 var_5977 = const()[name = string("op_5977"), val = int32(-1)]; + fp16 const_264_promoted = const()[name = string("const_264_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_105 = transpose(perm = var_5944, x = var_5939)[name = string("transpose_178")]; + tensor var_5979 = mul(x = hidden_states_105, y = const_264_promoted)[name = string("op_5979")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_5977, interleave = input_125_interleave_0, values = (hidden_states_105, var_5979))[name = string("input_125")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_5974_to_fp16 = const()[name = string("op_5974_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5974_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084224)))]; + tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5993_to_fp16)[name = string("q_13_cast_fp16")]; + int32 var_6004 = const()[name = string("op_6004"), val = int32(-1)]; + fp16 const_268_promoted = const()[name = string("const_268_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_107 = transpose(perm = var_5955, x = var_5950)[name = string("transpose_177")]; + tensor var_6006 = mul(x = hidden_states_107, y = const_268_promoted)[name = string("op_6006")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_6004, interleave = input_127_interleave_0, values = (hidden_states_107, var_6006))[name = string("input_127")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_6001_to_fp16 = const()[name = string("op_6001_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_6001_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor var_6020_to_fp16 = const()[name = string("op_6020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084800)))]; + tensor k_13_cast_fp16 = mul(x = normed_155, y = var_6020_to_fp16)[name = string("k_13_cast_fp16")]; + tensor var_6034_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5)[name = string("op_6034_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_274_promoted_to_fp16 = const()[name = string("const_274_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6055_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_274_promoted_to_fp16)[name = string("op_6055_cast_fp16")]; + int32 var_6057 = const()[name = string("op_6057"), val = int32(-1)]; + bool var_6058_interleave_0 = const()[name = string("op_6058_interleave_0"), val = bool(false)]; + tensor var_6058_cast_fp16 = concat(axis = var_6057, interleave = var_6058_interleave_0, values = (var_6055_cast_fp16, x1_25_cast_fp16))[name = string("op_6058_cast_fp16")]; + tensor var_6059_cast_fp16 = mul(x = var_6058_cast_fp16, y = sin_5)[name = string("op_6059_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_6034_cast_fp16, y = var_6059_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor var_6062_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5)[name = string("op_6062_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_277_promoted_to_fp16 = const()[name = string("const_277_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6083_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_277_promoted_to_fp16)[name = string("op_6083_cast_fp16")]; + int32 var_6085 = const()[name = string("op_6085"), val = int32(-1)]; + bool var_6086_interleave_0 = const()[name = string("op_6086_interleave_0"), val = bool(false)]; + tensor var_6086_cast_fp16 = concat(axis = var_6085, interleave = var_6086_interleave_0, values = (var_6083_cast_fp16, x1_27_cast_fp16))[name = string("op_6086_cast_fp16")]; + tensor var_6087_cast_fp16 = mul(x = var_6086_cast_fp16, y = sin_5)[name = string("op_6087_cast_fp16")]; + tensor key_states_63_cast_fp16 = add(x = var_6062_cast_fp16, y = var_6087_cast_fp16)[name = string("key_states_63_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([5])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([6])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, end_pos_1, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_116")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([27])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([28])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, end_pos_1, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_51 = transpose(perm = var_5966, x = var_5961)[name = string("transpose_176")]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = value_states_51, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_117")]; + tensor var_6186_begin_0 = const()[name = string("op_6186_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_6186_end_0 = const()[name = string("op_6186_end_0"), val = tensor([6, 1, 512, 256])]; + tensor var_6186_end_mask_0 = const()[name = string("op_6186_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6186_cast_fp16 = slice_by_index(begin = var_6186_begin_0, end = var_6186_end_0, end_mask = var_6186_end_mask_0, x = coreml_update_state_65)[name = string("op_6186_cast_fp16")]; + tensor var_6193_begin_0 = const()[name = string("op_6193_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_6193_end_0 = const()[name = string("op_6193_end_0"), val = tensor([28, 1, 512, 256])]; + tensor var_6193_end_mask_0 = const()[name = string("op_6193_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6193_cast_fp16 = slice_by_index(begin = var_6193_begin_0, end = var_6193_end_0, end_mask = var_6193_end_mask_0, x = coreml_update_state_65)[name = string("op_6193_cast_fp16")]; + tensor var_6232 = const()[name = string("op_6232"), val = tensor([1, 4, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_6232, x = var_6186_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_6252 = const()[name = string("op_6252"), val = tensor([1, 4, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_6252, x = var_6193_cast_fp16)[name = string("x_107_cast_fp16")]; + bool var_6279_transpose_x_0 = const()[name = string("op_6279_transpose_x_0"), val = bool(false)]; + bool var_6279_transpose_y_0 = const()[name = string("op_6279_transpose_y_0"), val = bool(true)]; + tensor var_6279 = matmul(transpose_x = var_6279_transpose_x_0, transpose_y = var_6279_transpose_y_0, x = query_states_51_cast_fp16, y = x_101_cast_fp16)[name = string("op_6279")]; + fp16 var_6280_to_fp16 = const()[name = string("op_6280_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_6279, y = var_6280_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = mask_slice_1)[name = string("attn_weights_27_cast_fp16")]; + int32 var_6315 = const()[name = string("op_6315"), val = int32(-1)]; + tensor var_6317_cast_fp16 = softmax(axis = var_6315, x = attn_weights_27_cast_fp16)[name = string("op_6317_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([4, 64, 512])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_6317_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([4, 512, 256])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_107_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 4, 64, 256])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_6329_perm_0 = const()[name = string("op_6329_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6348 = const()[name = string("op_6348"), val = tensor([1, 64, 1024])]; + tensor var_6329_cast_fp16 = transpose(perm = var_6329_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_175")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_6348, x = var_6329_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_6353 = const()[name = string("op_6353"), val = tensor([0, 2, 1])]; + string var_6369_pad_type_0 = const()[name = string("op_6369_pad_type_0"), val = string("valid")]; + int32 var_6369_groups_0 = const()[name = string("op_6369_groups_0"), val = int32(1)]; + tensor var_6369_strides_0 = const()[name = string("op_6369_strides_0"), val = tensor([1])]; + tensor var_6369_pad_0 = const()[name = string("op_6369_pad_0"), val = tensor([0, 0])]; + tensor var_6369_dilations_0 = const()[name = string("op_6369_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504085376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504970176))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6354_cast_fp16 = transpose(perm = var_6353, x = attn_output_65_cast_fp16)[name = string("transpose_174")]; + tensor var_6369_cast_fp16 = conv(dilations = var_6369_dilations_0, groups = var_6369_groups_0, pad = var_6369_pad_0, pad_type = var_6369_pad_type_0, strides = var_6369_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_6354_cast_fp16)[name = string("op_6369_cast_fp16")]; + tensor var_6373 = const()[name = string("op_6373"), val = tensor([0, 2, 1])]; + int32 var_6384 = const()[name = string("op_6384"), val = int32(-1)]; + fp16 const_289_promoted_to_fp16 = const()[name = string("const_289_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_109_cast_fp16 = transpose(perm = var_6373, x = var_6369_cast_fp16)[name = string("transpose_173")]; + tensor var_6386_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_289_promoted_to_fp16)[name = string("op_6386_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_6384, interleave = input_131_interleave_0, values = (hidden_states_109_cast_fp16, var_6386_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_6381_to_fp16 = const()[name = string("op_6381_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6381_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor var_6400_to_fp16 = const()[name = string("op_6400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505007104)))]; + tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_6400_to_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + int32 var_6413 = const()[name = string("op_6413"), val = int32(-1)]; + fp16 const_293_promoted_to_fp16 = const()[name = string("const_293_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6415_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_293_promoted_to_fp16)[name = string("op_6415_cast_fp16")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133_cast_fp16 = concat(axis = var_6413, interleave = input_133_interleave_0, values = (hidden_states_111_cast_fp16, var_6415_cast_fp16))[name = string("input_133_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_6410_to_fp16 = const()[name = string("op_6410_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6410_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor var_6429_to_fp16 = const()[name = string("op_6429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505009472)))]; + tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_6429_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_6441 = const()[name = string("op_6441"), val = tensor([0, 2, 1])]; + tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; + tensor var_6442_cast_fp16 = transpose(perm = var_6441, x = x_109_cast_fp16)[name = string("transpose_172")]; + tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_6442_cast_fp16)[name = string("input_135_cast_fp16")]; + string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; + tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; + tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; + int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505011840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510983872))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511205120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517177152))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; + string var_6467_mode_0 = const()[name = string("op_6467_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_6467_cast_fp16 = gelu(mode = var_6467_mode_0, x = x_111_cast_fp16)[name = string("op_6467_cast_fp16")]; + tensor input_137_cast_fp16 = mul(x = var_6467_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523370432))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; + tensor var_6475_axes_0 = const()[name = string("op_6475_axes_0"), val = tensor([2])]; + tensor var_6475_cast_fp16 = squeeze(axes = var_6475_axes_0, x = e_13_cast_fp16)[name = string("op_6475_cast_fp16")]; + tensor var_6476 = const()[name = string("op_6476"), val = tensor([0, 2, 1])]; + int32 var_6487 = const()[name = string("op_6487"), val = int32(-1)]; + fp16 const_297_promoted_to_fp16 = const()[name = string("const_297_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_113_cast_fp16 = transpose(perm = var_6476, x = var_6475_cast_fp16)[name = string("transpose_171")]; + tensor var_6489_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_297_promoted_to_fp16)[name = string("op_6489_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_6487, interleave = input_139_interleave_0, values = (hidden_states_113_cast_fp16, var_6489_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_6484_to_fp16 = const()[name = string("op_6484_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_6484_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; + tensor var_6503_to_fp16 = const()[name = string("op_6503_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523407360)))]; + tensor hidden_states_115_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_6503_to_fp16)[name = string("hidden_states_115_cast_fp16")]; + tensor hidden_states_117_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = hidden_states_115_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; + int32 var_6557 = const()[name = string("op_6557"), val = int32(-1)]; + fp16 const_302_promoted_to_fp16 = const()[name = string("const_302_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6559_cast_fp16 = mul(x = hidden_states_117_cast_fp16, y = const_302_promoted_to_fp16)[name = string("op_6559_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_6557, interleave = input_141_interleave_0, values = (hidden_states_117_cast_fp16, var_6559_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_6554_to_fp16 = const()[name = string("op_6554_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6554_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; + tensor var_6573_to_fp16 = const()[name = string("op_6573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523409728)))]; + tensor hidden_states_119_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6573_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor var_6584 = const()[name = string("op_6584"), val = tensor([0, 2, 1])]; + tensor var_6587_axes_0 = const()[name = string("op_6587_axes_0"), val = tensor([2])]; + tensor var_6585_cast_fp16 = transpose(perm = var_6584, x = hidden_states_119_cast_fp16)[name = string("transpose_170")]; + tensor var_6587_cast_fp16 = expand_dims(axes = var_6587_axes_0, x = var_6585_cast_fp16)[name = string("op_6587_cast_fp16")]; + string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; + tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; + tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; + int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; + tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6587_cast_fp16)[name = string("query_states_57")]; + string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; + tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; + tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; + int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; + tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6587_cast_fp16)[name = string("key_states_71")]; + string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; + tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; + tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; + int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; + tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6587_cast_fp16)[name = string("value_states_57")]; + tensor var_6629 = const()[name = string("op_6629"), val = tensor([1, 4, 256, 64])]; + tensor var_6630 = reshape(shape = var_6629, x = query_states_57)[name = string("op_6630")]; + tensor var_6635 = const()[name = string("op_6635"), val = tensor([0, 1, 3, 2])]; + tensor var_6640 = const()[name = string("op_6640"), val = tensor([1, 1, 256, 64])]; + tensor var_6641 = reshape(shape = var_6640, x = key_states_71)[name = string("op_6641")]; + tensor var_6646 = const()[name = string("op_6646"), val = tensor([0, 1, 3, 2])]; + tensor var_6651 = const()[name = string("op_6651"), val = tensor([1, 1, 256, 64])]; + tensor var_6652 = reshape(shape = var_6651, x = value_states_57)[name = string("op_6652")]; + tensor var_6657 = const()[name = string("op_6657"), val = tensor([0, 1, 3, 2])]; + int32 var_6668 = const()[name = string("op_6668"), val = int32(-1)]; + fp16 const_307_promoted = const()[name = string("const_307_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_121 = transpose(perm = var_6635, x = var_6630)[name = string("transpose_169")]; + tensor var_6670 = mul(x = hidden_states_121, y = const_307_promoted)[name = string("op_6670")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145 = concat(axis = var_6668, interleave = input_145_interleave_0, values = (hidden_states_121, var_6670))[name = string("input_145")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_6665_to_fp16 = const()[name = string("op_6665_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6665_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; + tensor var_6684_to_fp16 = const()[name = string("op_6684_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412096)))]; + tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6684_to_fp16)[name = string("q_15_cast_fp16")]; + int32 var_6695 = const()[name = string("op_6695"), val = int32(-1)]; + fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_123 = transpose(perm = var_6646, x = var_6641)[name = string("transpose_168")]; + tensor var_6697 = mul(x = hidden_states_123, y = const_311_promoted)[name = string("op_6697")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_6695, interleave = input_147_interleave_0, values = (hidden_states_123, var_6697))[name = string("input_147")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6692_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; + tensor var_6711_to_fp16 = const()[name = string("op_6711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412672)))]; + tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6711_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_6725_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5)[name = string("op_6725_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_317_promoted_to_fp16 = const()[name = string("const_317_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6746_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_317_promoted_to_fp16)[name = string("op_6746_cast_fp16")]; + int32 var_6748 = const()[name = string("op_6748"), val = int32(-1)]; + bool var_6749_interleave_0 = const()[name = string("op_6749_interleave_0"), val = bool(false)]; + tensor var_6749_cast_fp16 = concat(axis = var_6748, interleave = var_6749_interleave_0, values = (var_6746_cast_fp16, x1_29_cast_fp16))[name = string("op_6749_cast_fp16")]; + tensor var_6750_cast_fp16 = mul(x = var_6749_cast_fp16, y = sin_5)[name = string("op_6750_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_6725_cast_fp16, y = var_6750_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor var_6753_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5)[name = string("op_6753_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_320_promoted_to_fp16 = const()[name = string("const_320_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6774_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_320_promoted_to_fp16)[name = string("op_6774_cast_fp16")]; + int32 var_6776 = const()[name = string("op_6776"), val = int32(-1)]; + bool var_6777_interleave_0 = const()[name = string("op_6777_interleave_0"), val = bool(false)]; + tensor var_6777_cast_fp16 = concat(axis = var_6776, interleave = var_6777_interleave_0, values = (var_6774_cast_fp16, x1_31_cast_fp16))[name = string("op_6777_cast_fp16")]; + tensor var_6778_cast_fp16 = mul(x = var_6777_cast_fp16, y = sin_5)[name = string("op_6778_cast_fp16")]; + tensor key_states_73_cast_fp16 = add(x = var_6753_cast_fp16, y = var_6778_cast_fp16)[name = string("key_states_73_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([6])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([7])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, end_pos_1, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = key_states_73_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_118")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([28])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([29])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, end_pos_1, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_59 = transpose(perm = var_6657, x = var_6652)[name = string("transpose_167")]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = value_states_59, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_119")]; + tensor var_6877_begin_0 = const()[name = string("op_6877_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_6877_end_0 = const()[name = string("op_6877_end_0"), val = tensor([7, 1, 512, 256])]; + tensor var_6877_end_mask_0 = const()[name = string("op_6877_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6877_cast_fp16 = slice_by_index(begin = var_6877_begin_0, end = var_6877_end_0, end_mask = var_6877_end_mask_0, x = coreml_update_state_67)[name = string("op_6877_cast_fp16")]; + tensor var_6884_begin_0 = const()[name = string("op_6884_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_6884_end_0 = const()[name = string("op_6884_end_0"), val = tensor([29, 1, 512, 256])]; + tensor var_6884_end_mask_0 = const()[name = string("op_6884_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6884_cast_fp16 = slice_by_index(begin = var_6884_begin_0, end = var_6884_end_0, end_mask = var_6884_end_mask_0, x = coreml_update_state_67)[name = string("op_6884_cast_fp16")]; + tensor var_6923 = const()[name = string("op_6923"), val = tensor([1, 4, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_6923, x = var_6877_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_6943 = const()[name = string("op_6943"), val = tensor([1, 4, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_6943, x = var_6884_cast_fp16)[name = string("x_123_cast_fp16")]; + bool var_6970_transpose_x_0 = const()[name = string("op_6970_transpose_x_0"), val = bool(false)]; + bool var_6970_transpose_y_0 = const()[name = string("op_6970_transpose_y_0"), val = bool(true)]; + tensor var_6970 = matmul(transpose_x = var_6970_transpose_x_0, transpose_y = var_6970_transpose_y_0, x = query_states_59_cast_fp16, y = x_117_cast_fp16)[name = string("op_6970")]; + fp16 var_6971_to_fp16 = const()[name = string("op_6971_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_6970, y = var_6971_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = mask_slice_1)[name = string("attn_weights_31_cast_fp16")]; + int32 var_7006 = const()[name = string("op_7006"), val = int32(-1)]; + tensor var_7008_cast_fp16 = softmax(axis = var_7006, x = attn_weights_31_cast_fp16)[name = string("op_7008_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([4, 64, 512])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_7008_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([4, 512, 256])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_123_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 4, 64, 256])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_7020_perm_0 = const()[name = string("op_7020_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7039 = const()[name = string("op_7039"), val = tensor([1, 64, 1024])]; + tensor var_7020_cast_fp16 = transpose(perm = var_7020_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_166")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_7039, x = var_7020_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_7044 = const()[name = string("op_7044"), val = tensor([0, 2, 1])]; + string var_7060_pad_type_0 = const()[name = string("op_7060_pad_type_0"), val = string("valid")]; + int32 var_7060_groups_0 = const()[name = string("op_7060_groups_0"), val = int32(1)]; + tensor var_7060_strides_0 = const()[name = string("op_7060_strides_0"), val = tensor([1])]; + tensor var_7060_pad_0 = const()[name = string("op_7060_pad_0"), val = tensor([0, 0])]; + tensor var_7060_dilations_0 = const()[name = string("op_7060_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523413248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524298048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7045_cast_fp16 = transpose(perm = var_7044, x = attn_output_75_cast_fp16)[name = string("transpose_165")]; + tensor var_7060_cast_fp16 = conv(dilations = var_7060_dilations_0, groups = var_7060_groups_0, pad = var_7060_pad_0, pad_type = var_7060_pad_type_0, strides = var_7060_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_7045_cast_fp16)[name = string("op_7060_cast_fp16")]; + tensor var_7064 = const()[name = string("op_7064"), val = tensor([0, 2, 1])]; + int32 var_7075 = const()[name = string("op_7075"), val = int32(-1)]; + fp16 const_332_promoted_to_fp16 = const()[name = string("const_332_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_125_cast_fp16 = transpose(perm = var_7064, x = var_7060_cast_fp16)[name = string("transpose_164")]; + tensor var_7077_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_332_promoted_to_fp16)[name = string("op_7077_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_7075, interleave = input_151_interleave_0, values = (hidden_states_125_cast_fp16, var_7077_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_7072_to_fp16 = const()[name = string("op_7072_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7072_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; + tensor var_7091_to_fp16 = const()[name = string("op_7091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524334976)))]; + tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_7091_to_fp16)[name = string("attn_output_79_cast_fp16")]; + tensor hidden_states_127_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + int32 var_7104 = const()[name = string("op_7104"), val = int32(-1)]; + fp16 const_336_promoted_to_fp16 = const()[name = string("const_336_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7106_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = const_336_promoted_to_fp16)[name = string("op_7106_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_7104, interleave = input_153_interleave_0, values = (hidden_states_127_cast_fp16, var_7106_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_7101_to_fp16 = const()[name = string("op_7101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7101_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; + tensor var_7120_to_fp16 = const()[name = string("op_7120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524337344)))]; + tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_7120_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_7132 = const()[name = string("op_7132"), val = tensor([0, 2, 1])]; + tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; + tensor var_7133_cast_fp16 = transpose(perm = var_7132, x = x_125_cast_fp16)[name = string("transpose_163")]; + tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_7133_cast_fp16)[name = string("input_155_cast_fp16")]; + string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; + tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; + tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; + int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524339712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530311744))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530532992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536505024))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; + string var_7158_mode_0 = const()[name = string("op_7158_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7158_cast_fp16 = gelu(mode = var_7158_mode_0, x = x_127_cast_fp16)[name = string("op_7158_cast_fp16")]; + tensor input_157_cast_fp16 = mul(x = var_7158_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536726272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542698304))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; + tensor var_7166_axes_0 = const()[name = string("op_7166_axes_0"), val = tensor([2])]; + tensor var_7166_cast_fp16 = squeeze(axes = var_7166_axes_0, x = e_15_cast_fp16)[name = string("op_7166_cast_fp16")]; + tensor var_7167 = const()[name = string("op_7167"), val = tensor([0, 2, 1])]; + int32 var_7178 = const()[name = string("op_7178"), val = int32(-1)]; + fp16 const_340_promoted_to_fp16 = const()[name = string("const_340_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_129_cast_fp16 = transpose(perm = var_7167, x = var_7166_cast_fp16)[name = string("transpose_162")]; + tensor var_7180_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_340_promoted_to_fp16)[name = string("op_7180_cast_fp16")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159_cast_fp16 = concat(axis = var_7178, interleave = input_159_interleave_0, values = (hidden_states_129_cast_fp16, var_7180_cast_fp16))[name = string("input_159_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_7175_to_fp16 = const()[name = string("op_7175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7175_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor var_7194_to_fp16 = const()[name = string("op_7194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542735232)))]; + tensor hidden_states_131_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_7194_to_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + int32 var_7248 = const()[name = string("op_7248"), val = int32(-1)]; + fp16 const_345_promoted_to_fp16 = const()[name = string("const_345_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7250_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = const_345_promoted_to_fp16)[name = string("op_7250_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_7248, interleave = input_161_interleave_0, values = (hidden_states_133_cast_fp16, var_7250_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_7245_to_fp16 = const()[name = string("op_7245_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_7245_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor var_7264_to_fp16 = const()[name = string("op_7264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542737600)))]; + tensor hidden_states_135_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_7264_to_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor var_7275 = const()[name = string("op_7275"), val = tensor([0, 2, 1])]; + tensor var_7278_axes_0 = const()[name = string("op_7278_axes_0"), val = tensor([2])]; + tensor var_7276_cast_fp16 = transpose(perm = var_7275, x = hidden_states_135_cast_fp16)[name = string("transpose_161")]; + tensor var_7278_cast_fp16 = expand_dims(axes = var_7278_axes_0, x = var_7276_cast_fp16)[name = string("op_7278_cast_fp16")]; + string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; + tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; + tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; + int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; + tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_7278_cast_fp16)[name = string("query_states_65")]; + string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; + tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; + tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; + int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; + tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_7278_cast_fp16)[name = string("key_states_81")]; + string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; + tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; + tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; + int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; + tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_7278_cast_fp16)[name = string("value_states_65")]; + tensor var_7320 = const()[name = string("op_7320"), val = tensor([1, 4, 256, 64])]; + tensor var_7321 = reshape(shape = var_7320, x = query_states_65)[name = string("op_7321")]; + tensor var_7326 = const()[name = string("op_7326"), val = tensor([0, 1, 3, 2])]; + tensor var_7331 = const()[name = string("op_7331"), val = tensor([1, 1, 256, 64])]; + tensor var_7332 = reshape(shape = var_7331, x = key_states_81)[name = string("op_7332")]; + tensor var_7337 = const()[name = string("op_7337"), val = tensor([0, 1, 3, 2])]; + tensor var_7342 = const()[name = string("op_7342"), val = tensor([1, 1, 256, 64])]; + tensor var_7343 = reshape(shape = var_7342, x = value_states_65)[name = string("op_7343")]; + tensor var_7348 = const()[name = string("op_7348"), val = tensor([0, 1, 3, 2])]; + int32 var_7359 = const()[name = string("op_7359"), val = int32(-1)]; + fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_137 = transpose(perm = var_7326, x = var_7321)[name = string("transpose_160")]; + tensor var_7361 = mul(x = hidden_states_137, y = const_350_promoted)[name = string("op_7361")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165 = concat(axis = var_7359, interleave = input_165_interleave_0, values = (hidden_states_137, var_7361))[name = string("input_165")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_7356_to_fp16 = const()[name = string("op_7356_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_7356_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor var_7375_to_fp16 = const()[name = string("op_7375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542739968)))]; + tensor q_17_cast_fp16 = mul(x = normed_199, y = var_7375_to_fp16)[name = string("q_17_cast_fp16")]; + int32 var_7386 = const()[name = string("op_7386"), val = int32(-1)]; + fp16 const_354_promoted = const()[name = string("const_354_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_139 = transpose(perm = var_7337, x = var_7332)[name = string("transpose_159")]; + tensor var_7388 = mul(x = hidden_states_139, y = const_354_promoted)[name = string("op_7388")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_7386, interleave = input_167_interleave_0, values = (hidden_states_139, var_7388))[name = string("input_167")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_7383_to_fp16 = const()[name = string("op_7383_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_7383_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor var_7402_to_fp16 = const()[name = string("op_7402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542740544)))]; + tensor k_17_cast_fp16 = mul(x = normed_203, y = var_7402_to_fp16)[name = string("k_17_cast_fp16")]; + tensor var_7416_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5)[name = string("op_7416_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_360_promoted_to_fp16 = const()[name = string("const_360_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7437_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_360_promoted_to_fp16)[name = string("op_7437_cast_fp16")]; + int32 var_7439 = const()[name = string("op_7439"), val = int32(-1)]; + bool var_7440_interleave_0 = const()[name = string("op_7440_interleave_0"), val = bool(false)]; + tensor var_7440_cast_fp16 = concat(axis = var_7439, interleave = var_7440_interleave_0, values = (var_7437_cast_fp16, x1_33_cast_fp16))[name = string("op_7440_cast_fp16")]; + tensor var_7441_cast_fp16 = mul(x = var_7440_cast_fp16, y = sin_5)[name = string("op_7441_cast_fp16")]; + tensor query_states_67_cast_fp16 = add(x = var_7416_cast_fp16, y = var_7441_cast_fp16)[name = string("query_states_67_cast_fp16")]; + tensor var_7444_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5)[name = string("op_7444_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_363_promoted_to_fp16 = const()[name = string("const_363_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7465_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_363_promoted_to_fp16)[name = string("op_7465_cast_fp16")]; + int32 var_7467 = const()[name = string("op_7467"), val = int32(-1)]; + bool var_7468_interleave_0 = const()[name = string("op_7468_interleave_0"), val = bool(false)]; + tensor var_7468_cast_fp16 = concat(axis = var_7467, interleave = var_7468_interleave_0, values = (var_7465_cast_fp16, x1_35_cast_fp16))[name = string("op_7468_cast_fp16")]; + tensor var_7469_cast_fp16 = mul(x = var_7468_cast_fp16, y = sin_5)[name = string("op_7469_cast_fp16")]; + tensor key_states_83_cast_fp16 = add(x = var_7444_cast_fp16, y = var_7469_cast_fp16)[name = string("key_states_83_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([7])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([8])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, end_pos_1, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_120")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([29])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([30])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, end_pos_1, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_67 = transpose(perm = var_7348, x = var_7343)[name = string("transpose_158")]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = value_states_67, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_121")]; + tensor var_7568_begin_0 = const()[name = string("op_7568_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_7568_end_0 = const()[name = string("op_7568_end_0"), val = tensor([8, 1, 512, 256])]; + tensor var_7568_end_mask_0 = const()[name = string("op_7568_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7568_cast_fp16 = slice_by_index(begin = var_7568_begin_0, end = var_7568_end_0, end_mask = var_7568_end_mask_0, x = coreml_update_state_69)[name = string("op_7568_cast_fp16")]; + tensor var_7575_begin_0 = const()[name = string("op_7575_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_7575_end_0 = const()[name = string("op_7575_end_0"), val = tensor([30, 1, 512, 256])]; + tensor var_7575_end_mask_0 = const()[name = string("op_7575_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7575_cast_fp16 = slice_by_index(begin = var_7575_begin_0, end = var_7575_end_0, end_mask = var_7575_end_mask_0, x = coreml_update_state_69)[name = string("op_7575_cast_fp16")]; + tensor var_7614 = const()[name = string("op_7614"), val = tensor([1, 4, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_7614, x = var_7568_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_7634 = const()[name = string("op_7634"), val = tensor([1, 4, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_7634, x = var_7575_cast_fp16)[name = string("x_139_cast_fp16")]; + bool var_7661_transpose_x_0 = const()[name = string("op_7661_transpose_x_0"), val = bool(false)]; + bool var_7661_transpose_y_0 = const()[name = string("op_7661_transpose_y_0"), val = bool(true)]; + tensor var_7661 = matmul(transpose_x = var_7661_transpose_x_0, transpose_y = var_7661_transpose_y_0, x = query_states_67_cast_fp16, y = x_133_cast_fp16)[name = string("op_7661")]; + fp16 var_7662_to_fp16 = const()[name = string("op_7662_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_7661, y = var_7662_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = mask_slice_1)[name = string("attn_weights_35_cast_fp16")]; + int32 var_7697 = const()[name = string("op_7697"), val = int32(-1)]; + tensor var_7699_cast_fp16 = softmax(axis = var_7697, x = attn_weights_35_cast_fp16)[name = string("op_7699_cast_fp16")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([4, 64, 512])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_7699_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([4, 512, 256])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_139_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 4, 64, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_7711_perm_0 = const()[name = string("op_7711_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7730 = const()[name = string("op_7730"), val = tensor([1, 64, 1024])]; + tensor var_7711_cast_fp16 = transpose(perm = var_7711_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_157")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_7730, x = var_7711_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_7735 = const()[name = string("op_7735"), val = tensor([0, 2, 1])]; + string var_7751_pad_type_0 = const()[name = string("op_7751_pad_type_0"), val = string("valid")]; + int32 var_7751_groups_0 = const()[name = string("op_7751_groups_0"), val = int32(1)]; + tensor var_7751_strides_0 = const()[name = string("op_7751_strides_0"), val = tensor([1])]; + tensor var_7751_pad_0 = const()[name = string("op_7751_pad_0"), val = tensor([0, 0])]; + tensor var_7751_dilations_0 = const()[name = string("op_7751_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542741120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543625920))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7736_cast_fp16 = transpose(perm = var_7735, x = attn_output_85_cast_fp16)[name = string("transpose_156")]; + tensor var_7751_cast_fp16 = conv(dilations = var_7751_dilations_0, groups = var_7751_groups_0, pad = var_7751_pad_0, pad_type = var_7751_pad_type_0, strides = var_7751_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_7736_cast_fp16)[name = string("op_7751_cast_fp16")]; + tensor var_7755 = const()[name = string("op_7755"), val = tensor([0, 2, 1])]; + int32 var_7766 = const()[name = string("op_7766"), val = int32(-1)]; + fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_141_cast_fp16 = transpose(perm = var_7755, x = var_7751_cast_fp16)[name = string("transpose_155")]; + tensor var_7768_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_375_promoted_to_fp16)[name = string("op_7768_cast_fp16")]; + bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; + tensor input_171_cast_fp16 = concat(axis = var_7766, interleave = input_171_interleave_0, values = (hidden_states_141_cast_fp16, var_7768_cast_fp16))[name = string("input_171_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_7763_to_fp16 = const()[name = string("op_7763_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7763_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor var_7782_to_fp16 = const()[name = string("op_7782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543662848)))]; + tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7782_to_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + int32 var_7795 = const()[name = string("op_7795"), val = int32(-1)]; + fp16 const_379_promoted_to_fp16 = const()[name = string("const_379_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7797_cast_fp16 = mul(x = hidden_states_143_cast_fp16, y = const_379_promoted_to_fp16)[name = string("op_7797_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_7795, interleave = input_173_interleave_0, values = (hidden_states_143_cast_fp16, var_7797_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_7792_to_fp16 = const()[name = string("op_7792_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7792_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor var_7811_to_fp16 = const()[name = string("op_7811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543665216)))]; + tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7811_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_7823 = const()[name = string("op_7823"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_7824_cast_fp16 = transpose(perm = var_7823, x = x_141_cast_fp16)[name = string("transpose_154")]; + tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7824_cast_fp16)[name = string("input_175_cast_fp16")]; + string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; + tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; + tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; + int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543667584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549639616))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549860864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555832896))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; + string var_7849_mode_0 = const()[name = string("op_7849_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7849_cast_fp16 = gelu(mode = var_7849_mode_0, x = x_143_cast_fp16)[name = string("op_7849_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = var_7849_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556054144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562026176))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; + tensor var_7857_axes_0 = const()[name = string("op_7857_axes_0"), val = tensor([2])]; + tensor var_7857_cast_fp16 = squeeze(axes = var_7857_axes_0, x = e_17_cast_fp16)[name = string("op_7857_cast_fp16")]; + tensor var_7858 = const()[name = string("op_7858"), val = tensor([0, 2, 1])]; + int32 var_7869 = const()[name = string("op_7869"), val = int32(-1)]; + fp16 const_383_promoted_to_fp16 = const()[name = string("const_383_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_145_cast_fp16 = transpose(perm = var_7858, x = var_7857_cast_fp16)[name = string("transpose_153")]; + tensor var_7871_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_383_promoted_to_fp16)[name = string("op_7871_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_7869, interleave = input_179_interleave_0, values = (hidden_states_145_cast_fp16, var_7871_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_7866_to_fp16 = const()[name = string("op_7866_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7866_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; + tensor var_7885_to_fp16 = const()[name = string("op_7885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063104)))]; + tensor hidden_states_147_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7885_to_fp16)[name = string("hidden_states_147_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = hidden_states_147_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_7939 = const()[name = string("op_7939"), val = int32(-1)]; + fp16 const_388_promoted_to_fp16 = const()[name = string("const_388_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7941_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_388_promoted_to_fp16)[name = string("op_7941_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_7939, interleave = input_181_interleave_0, values = (hidden_states_149_cast_fp16, var_7941_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_7936_to_fp16 = const()[name = string("op_7936_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7936_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; + tensor var_7955_to_fp16 = const()[name = string("op_7955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065472)))]; + tensor hidden_states_151_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7955_to_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor var_7966 = const()[name = string("op_7966"), val = tensor([0, 2, 1])]; + tensor var_7969_axes_0 = const()[name = string("op_7969_axes_0"), val = tensor([2])]; + tensor var_7967_cast_fp16 = transpose(perm = var_7966, x = hidden_states_151_cast_fp16)[name = string("transpose_152")]; + tensor var_7969_cast_fp16 = expand_dims(axes = var_7969_axes_0, x = var_7967_cast_fp16)[name = string("op_7969_cast_fp16")]; + string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; + tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; + tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; + int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; + tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("query_states_73")]; + string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; + tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; + tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; + int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; + tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("key_states_91")]; + string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; + tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; + tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; + int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; + tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7969_cast_fp16)[name = string("value_states_73")]; + tensor var_8011 = const()[name = string("op_8011"), val = tensor([1, 4, 256, 64])]; + tensor var_8012 = reshape(shape = var_8011, x = query_states_73)[name = string("op_8012")]; + tensor var_8017 = const()[name = string("op_8017"), val = tensor([0, 1, 3, 2])]; + tensor var_8022 = const()[name = string("op_8022"), val = tensor([1, 1, 256, 64])]; + tensor var_8023 = reshape(shape = var_8022, x = key_states_91)[name = string("op_8023")]; + tensor var_8028 = const()[name = string("op_8028"), val = tensor([0, 1, 3, 2])]; + tensor var_8033 = const()[name = string("op_8033"), val = tensor([1, 1, 256, 64])]; + tensor var_8034 = reshape(shape = var_8033, x = value_states_73)[name = string("op_8034")]; + tensor var_8039 = const()[name = string("op_8039"), val = tensor([0, 1, 3, 2])]; + int32 var_8050 = const()[name = string("op_8050"), val = int32(-1)]; + fp16 const_393_promoted = const()[name = string("const_393_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_153 = transpose(perm = var_8017, x = var_8012)[name = string("transpose_151")]; + tensor var_8052 = mul(x = hidden_states_153, y = const_393_promoted)[name = string("op_8052")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_8050, interleave = input_185_interleave_0, values = (hidden_states_153, var_8052))[name = string("input_185")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_8047_to_fp16 = const()[name = string("op_8047_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8047_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; + tensor var_8066_to_fp16 = const()[name = string("op_8066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562067840)))]; + tensor q_19_cast_fp16 = mul(x = normed_223, y = var_8066_to_fp16)[name = string("q_19_cast_fp16")]; + int32 var_8077 = const()[name = string("op_8077"), val = int32(-1)]; + fp16 const_397_promoted = const()[name = string("const_397_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_155 = transpose(perm = var_8028, x = var_8023)[name = string("transpose_150")]; + tensor var_8079 = mul(x = hidden_states_155, y = const_397_promoted)[name = string("op_8079")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_8077, interleave = input_187_interleave_0, values = (hidden_states_155, var_8079))[name = string("input_187")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_8074_to_fp16 = const()[name = string("op_8074_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8074_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; + tensor var_8093_to_fp16 = const()[name = string("op_8093_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068416)))]; + tensor k_19_cast_fp16 = mul(x = normed_227, y = var_8093_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_8107_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5)[name = string("op_8107_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_403_promoted_to_fp16 = const()[name = string("const_403_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8128_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_403_promoted_to_fp16)[name = string("op_8128_cast_fp16")]; + int32 var_8130 = const()[name = string("op_8130"), val = int32(-1)]; + bool var_8131_interleave_0 = const()[name = string("op_8131_interleave_0"), val = bool(false)]; + tensor var_8131_cast_fp16 = concat(axis = var_8130, interleave = var_8131_interleave_0, values = (var_8128_cast_fp16, x1_37_cast_fp16))[name = string("op_8131_cast_fp16")]; + tensor var_8132_cast_fp16 = mul(x = var_8131_cast_fp16, y = sin_5)[name = string("op_8132_cast_fp16")]; + tensor query_states_75_cast_fp16 = add(x = var_8107_cast_fp16, y = var_8132_cast_fp16)[name = string("query_states_75_cast_fp16")]; + tensor var_8135_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5)[name = string("op_8135_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8156_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8156_cast_fp16")]; + int32 var_8158 = const()[name = string("op_8158"), val = int32(-1)]; + bool var_8159_interleave_0 = const()[name = string("op_8159_interleave_0"), val = bool(false)]; + tensor var_8159_cast_fp16 = concat(axis = var_8158, interleave = var_8159_interleave_0, values = (var_8156_cast_fp16, x1_39_cast_fp16))[name = string("op_8159_cast_fp16")]; + tensor var_8160_cast_fp16 = mul(x = var_8159_cast_fp16, y = sin_5)[name = string("op_8160_cast_fp16")]; + tensor key_states_93_cast_fp16 = add(x = var_8135_cast_fp16, y = var_8160_cast_fp16)[name = string("key_states_93_cast_fp16")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([8])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([9])]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; + tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; + tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, end_pos_1, concat_165_values3_0))[name = string("concat_165")]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_122")]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([30])]; + tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; + tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; + tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([31])]; + int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; + bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; + tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; + tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; + tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, end_pos_1, concat_169_values3_0))[name = string("concat_169")]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_75 = transpose(perm = var_8039, x = var_8034)[name = string("transpose_149")]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = value_states_75, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_123")]; + tensor var_8259_begin_0 = const()[name = string("op_8259_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_8259_end_0 = const()[name = string("op_8259_end_0"), val = tensor([9, 1, 512, 256])]; + tensor var_8259_end_mask_0 = const()[name = string("op_8259_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8259_cast_fp16 = slice_by_index(begin = var_8259_begin_0, end = var_8259_end_0, end_mask = var_8259_end_mask_0, x = coreml_update_state_71)[name = string("op_8259_cast_fp16")]; + tensor var_8266_begin_0 = const()[name = string("op_8266_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_8266_end_0 = const()[name = string("op_8266_end_0"), val = tensor([31, 1, 512, 256])]; + tensor var_8266_end_mask_0 = const()[name = string("op_8266_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8266_cast_fp16 = slice_by_index(begin = var_8266_begin_0, end = var_8266_end_0, end_mask = var_8266_end_mask_0, x = coreml_update_state_71)[name = string("op_8266_cast_fp16")]; + tensor var_8305 = const()[name = string("op_8305"), val = tensor([1, 4, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_8305, x = var_8259_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_8325 = const()[name = string("op_8325"), val = tensor([1, 4, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_8325, x = var_8266_cast_fp16)[name = string("x_155_cast_fp16")]; + bool var_8352_transpose_x_0 = const()[name = string("op_8352_transpose_x_0"), val = bool(false)]; + bool var_8352_transpose_y_0 = const()[name = string("op_8352_transpose_y_0"), val = bool(true)]; + tensor var_8352 = matmul(transpose_x = var_8352_transpose_x_0, transpose_y = var_8352_transpose_y_0, x = query_states_75_cast_fp16, y = x_149_cast_fp16)[name = string("op_8352")]; + fp16 var_8353_to_fp16 = const()[name = string("op_8353_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_8352, y = var_8353_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = mask_slice_1)[name = string("attn_weights_39_cast_fp16")]; + int32 var_8388 = const()[name = string("op_8388"), val = int32(-1)]; + tensor var_8390_cast_fp16 = softmax(axis = var_8388, x = attn_weights_39_cast_fp16)[name = string("op_8390_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([4, 64, 512])]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_8390_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([4, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_155_cast_fp16)[name = string("reshape_28_cast_fp16")]; + bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; + bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; + tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; + tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 4, 64, 256])]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor var_8402_perm_0 = const()[name = string("op_8402_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8421 = const()[name = string("op_8421"), val = tensor([1, 64, 1024])]; + tensor var_8402_cast_fp16 = transpose(perm = var_8402_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_8421, x = var_8402_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_8426 = const()[name = string("op_8426"), val = tensor([0, 2, 1])]; + string var_8442_pad_type_0 = const()[name = string("op_8442_pad_type_0"), val = string("valid")]; + int32 var_8442_groups_0 = const()[name = string("op_8442_groups_0"), val = int32(1)]; + tensor var_8442_strides_0 = const()[name = string("op_8442_strides_0"), val = tensor([1])]; + tensor var_8442_pad_0 = const()[name = string("op_8442_pad_0"), val = tensor([0, 0])]; + tensor var_8442_dilations_0 = const()[name = string("op_8442_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562953792))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8427_cast_fp16 = transpose(perm = var_8426, x = attn_output_95_cast_fp16)[name = string("transpose_147")]; + tensor var_8442_cast_fp16 = conv(dilations = var_8442_dilations_0, groups = var_8442_groups_0, pad = var_8442_pad_0, pad_type = var_8442_pad_type_0, strides = var_8442_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_8427_cast_fp16)[name = string("op_8442_cast_fp16")]; + tensor var_8446 = const()[name = string("op_8446"), val = tensor([0, 2, 1])]; + int32 var_8457 = const()[name = string("op_8457"), val = int32(-1)]; + fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_157_cast_fp16 = transpose(perm = var_8446, x = var_8442_cast_fp16)[name = string("transpose_146")]; + tensor var_8459_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8459_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_8457, interleave = input_191_interleave_0, values = (hidden_states_157_cast_fp16, var_8459_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_8454_to_fp16 = const()[name = string("op_8454_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_8454_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; + tensor var_8473_to_fp16 = const()[name = string("op_8473_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562990720)))]; + tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_8473_to_fp16)[name = string("attn_output_99_cast_fp16")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_8486 = const()[name = string("op_8486"), val = int32(-1)]; + fp16 const_422_promoted_to_fp16 = const()[name = string("const_422_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8488_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_422_promoted_to_fp16)[name = string("op_8488_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_8486, interleave = input_193_interleave_0, values = (hidden_states_159_cast_fp16, var_8488_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_8483_to_fp16 = const()[name = string("op_8483_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_8483_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; + tensor var_8502_to_fp16 = const()[name = string("op_8502_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562993088)))]; + tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_8502_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_8514 = const()[name = string("op_8514"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_8515_cast_fp16 = transpose(perm = var_8514, x = x_157_cast_fp16)[name = string("transpose_145")]; + tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_8515_cast_fp16)[name = string("input_195_cast_fp16")]; + string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; + tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; + tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; + int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562995456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568967488))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569188736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575160768))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; + string var_8540_mode_0 = const()[name = string("op_8540_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_8540_cast_fp16 = gelu(mode = var_8540_mode_0, x = x_159_cast_fp16)[name = string("op_8540_cast_fp16")]; + tensor input_197_cast_fp16 = mul(x = var_8540_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575382016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581354048))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; + tensor var_8548_axes_0 = const()[name = string("op_8548_axes_0"), val = tensor([2])]; + tensor var_8548_cast_fp16 = squeeze(axes = var_8548_axes_0, x = e_19_cast_fp16)[name = string("op_8548_cast_fp16")]; + tensor var_8549 = const()[name = string("op_8549"), val = tensor([0, 2, 1])]; + int32 var_8560 = const()[name = string("op_8560"), val = int32(-1)]; + fp16 const_426_promoted_to_fp16 = const()[name = string("const_426_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_161_cast_fp16 = transpose(perm = var_8549, x = var_8548_cast_fp16)[name = string("transpose_144")]; + tensor var_8562_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_426_promoted_to_fp16)[name = string("op_8562_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_8560, interleave = input_199_interleave_0, values = (hidden_states_161_cast_fp16, var_8562_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_8557_to_fp16 = const()[name = string("op_8557_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_8557_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor var_8576_to_fp16 = const()[name = string("op_8576_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581390976)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_8576_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor hidden_states_165_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = hidden_states_163_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + int32 var_8630 = const()[name = string("op_8630"), val = int32(-1)]; + fp16 const_431_promoted_to_fp16 = const()[name = string("const_431_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8632_cast_fp16 = mul(x = hidden_states_165_cast_fp16, y = const_431_promoted_to_fp16)[name = string("op_8632_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_8630, interleave = input_201_interleave_0, values = (hidden_states_165_cast_fp16, var_8632_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_8627_to_fp16 = const()[name = string("op_8627_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_8627_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor var_8646_to_fp16 = const()[name = string("op_8646_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581393344)))]; + tensor hidden_states_167_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_8646_to_fp16)[name = string("hidden_states_167_cast_fp16")]; + tensor var_8657 = const()[name = string("op_8657"), val = tensor([0, 2, 1])]; + tensor var_8660_axes_0 = const()[name = string("op_8660_axes_0"), val = tensor([2])]; + tensor var_8658_cast_fp16 = transpose(perm = var_8657, x = hidden_states_167_cast_fp16)[name = string("transpose_143")]; + tensor var_8660_cast_fp16 = expand_dims(axes = var_8660_axes_0, x = var_8658_cast_fp16)[name = string("op_8660_cast_fp16")]; + string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; + tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; + tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; + int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; + tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_8660_cast_fp16)[name = string("query_states_81")]; + string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; + tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; + tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; + int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; + tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_8660_cast_fp16)[name = string("key_states_101")]; + string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; + tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; + tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; + int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; + tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_8660_cast_fp16)[name = string("value_states_81")]; + tensor var_8702 = const()[name = string("op_8702"), val = tensor([1, 4, 256, 64])]; + tensor var_8703 = reshape(shape = var_8702, x = query_states_81)[name = string("op_8703")]; + tensor var_8708 = const()[name = string("op_8708"), val = tensor([0, 1, 3, 2])]; + tensor var_8713 = const()[name = string("op_8713"), val = tensor([1, 1, 256, 64])]; + tensor var_8714 = reshape(shape = var_8713, x = key_states_101)[name = string("op_8714")]; + tensor var_8719 = const()[name = string("op_8719"), val = tensor([0, 1, 3, 2])]; + tensor var_8724 = const()[name = string("op_8724"), val = tensor([1, 1, 256, 64])]; + tensor var_8725 = reshape(shape = var_8724, x = value_states_81)[name = string("op_8725")]; + tensor var_8730 = const()[name = string("op_8730"), val = tensor([0, 1, 3, 2])]; + int32 var_8741 = const()[name = string("op_8741"), val = int32(-1)]; + fp16 const_436_promoted = const()[name = string("const_436_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_169 = transpose(perm = var_8708, x = var_8703)[name = string("transpose_142")]; + tensor var_8743 = mul(x = hidden_states_169, y = const_436_promoted)[name = string("op_8743")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_8741, interleave = input_205_interleave_0, values = (hidden_states_169, var_8743))[name = string("input_205")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_8738_to_fp16 = const()[name = string("op_8738_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_8738_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor var_8757_to_fp16 = const()[name = string("op_8757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581395712)))]; + tensor q_21_cast_fp16 = mul(x = normed_247, y = var_8757_to_fp16)[name = string("q_21_cast_fp16")]; + int32 var_8768 = const()[name = string("op_8768"), val = int32(-1)]; + fp16 const_440_promoted = const()[name = string("const_440_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_171 = transpose(perm = var_8719, x = var_8714)[name = string("transpose_141")]; + tensor var_8770 = mul(x = hidden_states_171, y = const_440_promoted)[name = string("op_8770")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207 = concat(axis = var_8768, interleave = input_207_interleave_0, values = (hidden_states_171, var_8770))[name = string("input_207")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_8765_to_fp16 = const()[name = string("op_8765_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_8765_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor var_8784_to_fp16 = const()[name = string("op_8784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396288)))]; + tensor k_21_cast_fp16 = mul(x = normed_251, y = var_8784_to_fp16)[name = string("k_21_cast_fp16")]; + tensor var_8798_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5)[name = string("op_8798_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_446_promoted_to_fp16 = const()[name = string("const_446_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8819_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_446_promoted_to_fp16)[name = string("op_8819_cast_fp16")]; + int32 var_8821 = const()[name = string("op_8821"), val = int32(-1)]; + bool var_8822_interleave_0 = const()[name = string("op_8822_interleave_0"), val = bool(false)]; + tensor var_8822_cast_fp16 = concat(axis = var_8821, interleave = var_8822_interleave_0, values = (var_8819_cast_fp16, x1_41_cast_fp16))[name = string("op_8822_cast_fp16")]; + tensor var_8823_cast_fp16 = mul(x = var_8822_cast_fp16, y = sin_5)[name = string("op_8823_cast_fp16")]; + tensor query_states_83_cast_fp16 = add(x = var_8798_cast_fp16, y = var_8823_cast_fp16)[name = string("query_states_83_cast_fp16")]; + tensor var_8826_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5)[name = string("op_8826_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_449_promoted_to_fp16 = const()[name = string("const_449_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8847_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_449_promoted_to_fp16)[name = string("op_8847_cast_fp16")]; + int32 var_8849 = const()[name = string("op_8849"), val = int32(-1)]; + bool var_8850_interleave_0 = const()[name = string("op_8850_interleave_0"), val = bool(false)]; + tensor var_8850_cast_fp16 = concat(axis = var_8849, interleave = var_8850_interleave_0, values = (var_8847_cast_fp16, x1_43_cast_fp16))[name = string("op_8850_cast_fp16")]; + tensor var_8851_cast_fp16 = mul(x = var_8850_cast_fp16, y = sin_5)[name = string("op_8851_cast_fp16")]; + tensor key_states_103_cast_fp16 = add(x = var_8826_cast_fp16, y = var_8851_cast_fp16)[name = string("key_states_103_cast_fp16")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([9])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([10])]; + int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; + bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; + tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, end_pos_1, concat_183_values3_0))[name = string("concat_183")]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = key_states_103_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_124")]; + tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([31])]; + tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; + tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([32])]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; + tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; + tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; + int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; + bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; + tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, end_pos_1, concat_187_values3_0))[name = string("concat_187")]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_83 = transpose(perm = var_8730, x = var_8725)[name = string("transpose_140")]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = value_states_83, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_125")]; + tensor var_8950_begin_0 = const()[name = string("op_8950_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_8950_end_0 = const()[name = string("op_8950_end_0"), val = tensor([10, 1, 512, 256])]; + tensor var_8950_end_mask_0 = const()[name = string("op_8950_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8950_cast_fp16 = slice_by_index(begin = var_8950_begin_0, end = var_8950_end_0, end_mask = var_8950_end_mask_0, x = coreml_update_state_73)[name = string("op_8950_cast_fp16")]; + tensor var_8957_begin_0 = const()[name = string("op_8957_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_8957_end_0 = const()[name = string("op_8957_end_0"), val = tensor([32, 1, 512, 256])]; + tensor var_8957_end_mask_0 = const()[name = string("op_8957_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8957_cast_fp16 = slice_by_index(begin = var_8957_begin_0, end = var_8957_end_0, end_mask = var_8957_end_mask_0, x = coreml_update_state_73)[name = string("op_8957_cast_fp16")]; + tensor var_8996 = const()[name = string("op_8996"), val = tensor([1, 4, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_8996, x = var_8950_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_9016 = const()[name = string("op_9016"), val = tensor([1, 4, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_9016, x = var_8957_cast_fp16)[name = string("x_171_cast_fp16")]; + bool var_9043_transpose_x_0 = const()[name = string("op_9043_transpose_x_0"), val = bool(false)]; + bool var_9043_transpose_y_0 = const()[name = string("op_9043_transpose_y_0"), val = bool(true)]; + tensor var_9043 = matmul(transpose_x = var_9043_transpose_x_0, transpose_y = var_9043_transpose_y_0, x = query_states_83_cast_fp16, y = x_165_cast_fp16)[name = string("op_9043")]; + fp16 var_9044_to_fp16 = const()[name = string("op_9044_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_41_cast_fp16 = mul(x = var_9043, y = var_9044_to_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = mask_slice_1)[name = string("attn_weights_43_cast_fp16")]; + int32 var_9079 = const()[name = string("op_9079"), val = int32(-1)]; + tensor var_9081_cast_fp16 = softmax(axis = var_9079, x = attn_weights_43_cast_fp16)[name = string("op_9081_cast_fp16")]; + tensor concat_192 = const()[name = string("concat_192"), val = tensor([4, 64, 512])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_9081_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor concat_193 = const()[name = string("concat_193"), val = tensor([4, 512, 256])]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_171_cast_fp16)[name = string("reshape_31_cast_fp16")]; + bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; + bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; + tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 4, 64, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor var_9093_perm_0 = const()[name = string("op_9093_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9112 = const()[name = string("op_9112"), val = tensor([1, 64, 1024])]; + tensor var_9093_cast_fp16 = transpose(perm = var_9093_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_139")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_9112, x = var_9093_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_9117 = const()[name = string("op_9117"), val = tensor([0, 2, 1])]; + string var_9133_pad_type_0 = const()[name = string("op_9133_pad_type_0"), val = string("valid")]; + int32 var_9133_groups_0 = const()[name = string("op_9133_groups_0"), val = int32(1)]; + tensor var_9133_strides_0 = const()[name = string("op_9133_strides_0"), val = tensor([1])]; + tensor var_9133_pad_0 = const()[name = string("op_9133_pad_0"), val = tensor([0, 0])]; + tensor var_9133_dilations_0 = const()[name = string("op_9133_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582281664))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9118_cast_fp16 = transpose(perm = var_9117, x = attn_output_105_cast_fp16)[name = string("transpose_138")]; + tensor var_9133_cast_fp16 = conv(dilations = var_9133_dilations_0, groups = var_9133_groups_0, pad = var_9133_pad_0, pad_type = var_9133_pad_type_0, strides = var_9133_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_9118_cast_fp16)[name = string("op_9133_cast_fp16")]; + tensor var_9137 = const()[name = string("op_9137"), val = tensor([0, 2, 1])]; + int32 var_9148 = const()[name = string("op_9148"), val = int32(-1)]; + fp16 const_461_promoted_to_fp16 = const()[name = string("const_461_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_173_cast_fp16 = transpose(perm = var_9137, x = var_9133_cast_fp16)[name = string("transpose_137")]; + tensor var_9150_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_461_promoted_to_fp16)[name = string("op_9150_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_9148, interleave = input_211_interleave_0, values = (hidden_states_173_cast_fp16, var_9150_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_9145_to_fp16 = const()[name = string("op_9145_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_9145_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor var_9164_to_fp16 = const()[name = string("op_9164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582318592)))]; + tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_9164_to_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = add(x = hidden_states_165_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + int32 var_9177 = const()[name = string("op_9177"), val = int32(-1)]; + fp16 const_465_promoted_to_fp16 = const()[name = string("const_465_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9179_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = const_465_promoted_to_fp16)[name = string("op_9179_cast_fp16")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213_cast_fp16 = concat(axis = var_9177, interleave = input_213_interleave_0, values = (hidden_states_175_cast_fp16, var_9179_cast_fp16))[name = string("input_213_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_9174_to_fp16 = const()[name = string("op_9174_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_9174_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor var_9193_to_fp16 = const()[name = string("op_9193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582320960)))]; + tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_9193_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_9205 = const()[name = string("op_9205"), val = tensor([0, 2, 1])]; + tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; + tensor var_9206_cast_fp16 = transpose(perm = var_9205, x = x_173_cast_fp16)[name = string("transpose_136")]; + tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_9206_cast_fp16)[name = string("input_215_cast_fp16")]; + string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; + tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; + tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; + int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582323328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588295360))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588516608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594488640))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; + string var_9231_mode_0 = const()[name = string("op_9231_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9231_cast_fp16 = gelu(mode = var_9231_mode_0, x = x_175_cast_fp16)[name = string("op_9231_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_9231_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594709888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600681920))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; + tensor var_9239_axes_0 = const()[name = string("op_9239_axes_0"), val = tensor([2])]; + tensor var_9239_cast_fp16 = squeeze(axes = var_9239_axes_0, x = e_21_cast_fp16)[name = string("op_9239_cast_fp16")]; + tensor var_9240 = const()[name = string("op_9240"), val = tensor([0, 2, 1])]; + int32 var_9251 = const()[name = string("op_9251"), val = int32(-1)]; + fp16 const_469_promoted_to_fp16 = const()[name = string("const_469_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_177_cast_fp16 = transpose(perm = var_9240, x = var_9239_cast_fp16)[name = string("transpose_135")]; + tensor var_9253_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_469_promoted_to_fp16)[name = string("op_9253_cast_fp16")]; + bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; + tensor input_219_cast_fp16 = concat(axis = var_9251, interleave = input_219_interleave_0, values = (hidden_states_177_cast_fp16, var_9253_cast_fp16))[name = string("input_219_cast_fp16")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_9248_to_fp16 = const()[name = string("op_9248_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_9248_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; + tensor var_9267_to_fp16 = const()[name = string("op_9267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600718848)))]; + tensor hidden_states_179_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_9267_to_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + int32 var_9321 = const()[name = string("op_9321"), val = int32(-1)]; + fp16 const_474_promoted_to_fp16 = const()[name = string("const_474_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9323_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_474_promoted_to_fp16)[name = string("op_9323_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_9321, interleave = input_221_interleave_0, values = (hidden_states_181_cast_fp16, var_9323_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_9318_to_fp16 = const()[name = string("op_9318_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_9318_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; + tensor var_9337_to_fp16 = const()[name = string("op_9337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600721216)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_9337_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_9348 = const()[name = string("op_9348"), val = tensor([0, 2, 1])]; + tensor var_9351_axes_0 = const()[name = string("op_9351_axes_0"), val = tensor([2])]; + tensor var_9349_cast_fp16 = transpose(perm = var_9348, x = hidden_states_183_cast_fp16)[name = string("transpose_134")]; + tensor var_9351_cast_fp16 = expand_dims(axes = var_9351_axes_0, x = var_9349_cast_fp16)[name = string("op_9351_cast_fp16")]; + string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; + tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; + tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; + int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; + tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_9351_cast_fp16)[name = string("query_states_89")]; + string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; + tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; + tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; + int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; + tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_9351_cast_fp16)[name = string("key_states_111")]; + string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; + tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; + tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; + int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; + tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_9351_cast_fp16)[name = string("value_states_89")]; + tensor var_9393 = const()[name = string("op_9393"), val = tensor([1, 4, 256, 64])]; + tensor var_9394 = reshape(shape = var_9393, x = query_states_89)[name = string("op_9394")]; + tensor var_9399 = const()[name = string("op_9399"), val = tensor([0, 1, 3, 2])]; + tensor var_9404 = const()[name = string("op_9404"), val = tensor([1, 1, 256, 64])]; + tensor var_9405 = reshape(shape = var_9404, x = key_states_111)[name = string("op_9405")]; + tensor var_9410 = const()[name = string("op_9410"), val = tensor([0, 1, 3, 2])]; + tensor var_9415 = const()[name = string("op_9415"), val = tensor([1, 1, 256, 64])]; + tensor var_9416 = reshape(shape = var_9415, x = value_states_89)[name = string("op_9416")]; + tensor var_9421 = const()[name = string("op_9421"), val = tensor([0, 1, 3, 2])]; + int32 var_9432 = const()[name = string("op_9432"), val = int32(-1)]; + fp16 const_479_promoted = const()[name = string("const_479_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_185 = transpose(perm = var_9399, x = var_9394)[name = string("transpose_133")]; + tensor var_9434 = mul(x = hidden_states_185, y = const_479_promoted)[name = string("op_9434")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225 = concat(axis = var_9432, interleave = input_225_interleave_0, values = (hidden_states_185, var_9434))[name = string("input_225")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_9429_to_fp16 = const()[name = string("op_9429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_9429_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; + tensor var_9448_to_fp16 = const()[name = string("op_9448_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600723584)))]; + tensor q_23_cast_fp16 = mul(x = normed_271, y = var_9448_to_fp16)[name = string("q_23_cast_fp16")]; + int32 var_9459 = const()[name = string("op_9459"), val = int32(-1)]; + fp16 const_483_promoted = const()[name = string("const_483_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_187 = transpose(perm = var_9410, x = var_9405)[name = string("transpose_132")]; + tensor var_9461 = mul(x = hidden_states_187, y = const_483_promoted)[name = string("op_9461")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227 = concat(axis = var_9459, interleave = input_227_interleave_0, values = (hidden_states_187, var_9461))[name = string("input_227")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_9456_to_fp16 = const()[name = string("op_9456_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_9456_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; + tensor var_9475_to_fp16 = const()[name = string("op_9475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724160)))]; + tensor k_23_cast_fp16 = mul(x = normed_275, y = var_9475_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_9489_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_35)[name = string("op_9489_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_489_promoted_to_fp16 = const()[name = string("const_489_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9510_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_489_promoted_to_fp16)[name = string("op_9510_cast_fp16")]; + int32 var_9512 = const()[name = string("op_9512"), val = int32(-1)]; + bool var_9513_interleave_0 = const()[name = string("op_9513_interleave_0"), val = bool(false)]; + tensor var_9513_cast_fp16 = concat(axis = var_9512, interleave = var_9513_interleave_0, values = (var_9510_cast_fp16, x1_45_cast_fp16))[name = string("op_9513_cast_fp16")]; + tensor var_9514_cast_fp16 = mul(x = var_9513_cast_fp16, y = sin_35)[name = string("op_9514_cast_fp16")]; + tensor query_states_91_cast_fp16 = add(x = var_9489_cast_fp16, y = var_9514_cast_fp16)[name = string("query_states_91_cast_fp16")]; + tensor var_9517_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_35)[name = string("op_9517_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_492_promoted_to_fp16 = const()[name = string("const_492_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9538_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_492_promoted_to_fp16)[name = string("op_9538_cast_fp16")]; + int32 var_9540 = const()[name = string("op_9540"), val = int32(-1)]; + bool var_9541_interleave_0 = const()[name = string("op_9541_interleave_0"), val = bool(false)]; + tensor var_9541_cast_fp16 = concat(axis = var_9540, interleave = var_9541_interleave_0, values = (var_9538_cast_fp16, x1_47_cast_fp16))[name = string("op_9541_cast_fp16")]; + tensor var_9542_cast_fp16 = mul(x = var_9541_cast_fp16, y = sin_35)[name = string("op_9542_cast_fp16")]; + tensor key_states_113_cast_fp16 = add(x = var_9517_cast_fp16, y = var_9542_cast_fp16)[name = string("key_states_113_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_113_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_126_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_126")]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_91 = transpose(perm = var_9421, x = var_9416)[name = string("transpose_131")]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = value_states_91, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_127_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_127")]; + tensor var_9641_begin_0 = const()[name = string("op_9641_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_9641_end_0 = const()[name = string("op_9641_end_0"), val = tensor([2, 1, 4096, 256])]; + tensor var_9641_end_mask_0 = const()[name = string("op_9641_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9641_cast_fp16 = slice_by_index(begin = var_9641_begin_0, end = var_9641_end_0, end_mask = var_9641_end_mask_0, x = coreml_update_state_75)[name = string("op_9641_cast_fp16")]; + tensor var_9648_begin_0 = const()[name = string("op_9648_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_9648_end_0 = const()[name = string("op_9648_end_0"), val = tensor([6, 1, 4096, 256])]; + tensor var_9648_end_mask_0 = const()[name = string("op_9648_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9648_cast_fp16 = slice_by_index(begin = var_9648_begin_0, end = var_9648_end_0, end_mask = var_9648_end_mask_0, x = coreml_update_state_75)[name = string("op_9648_cast_fp16")]; + tensor var_9687 = const()[name = string("op_9687"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_9687, x = var_9641_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_9707 = const()[name = string("op_9707"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_9707, x = var_9648_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_9734_transpose_x_0 = const()[name = string("op_9734_transpose_x_0"), val = bool(false)]; + bool var_9734_transpose_y_0 = const()[name = string("op_9734_transpose_y_0"), val = bool(true)]; + tensor var_9734 = matmul(transpose_x = var_9734_transpose_x_0, transpose_y = var_9734_transpose_y_0, x = query_states_91_cast_fp16, y = x_181_cast_fp16)[name = string("op_9734")]; + fp16 var_9735_to_fp16 = const()[name = string("op_9735_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_45_cast_fp16 = mul(x = var_9734, y = var_9735_to_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; + int32 var_9770 = const()[name = string("op_9770"), val = int32(-1)]; + tensor var_9772_cast_fp16 = softmax(axis = var_9770, x = attn_weights_47_cast_fp16)[name = string("op_9772_cast_fp16")]; + tensor concat_210 = const()[name = string("concat_210"), val = tensor([4, 64, 4096])]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_9772_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor concat_211 = const()[name = string("concat_211"), val = tensor([4, 4096, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_187_cast_fp16)[name = string("reshape_34_cast_fp16")]; + bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; + bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; + tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; + tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 4, 64, 256])]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor var_9784_perm_0 = const()[name = string("op_9784_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9803 = const()[name = string("op_9803"), val = tensor([1, 64, 1024])]; + tensor var_9784_cast_fp16 = transpose(perm = var_9784_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_130")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_9803, x = var_9784_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_9808 = const()[name = string("op_9808"), val = tensor([0, 2, 1])]; + string var_9824_pad_type_0 = const()[name = string("op_9824_pad_type_0"), val = string("valid")]; + int32 var_9824_groups_0 = const()[name = string("op_9824_groups_0"), val = int32(1)]; + tensor var_9824_strides_0 = const()[name = string("op_9824_strides_0"), val = tensor([1])]; + tensor var_9824_pad_0 = const()[name = string("op_9824_pad_0"), val = tensor([0, 0])]; + tensor var_9824_dilations_0 = const()[name = string("op_9824_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601609536))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9809_cast_fp16 = transpose(perm = var_9808, x = attn_output_115_cast_fp16)[name = string("transpose_129")]; + tensor var_9824_cast_fp16 = conv(dilations = var_9824_dilations_0, groups = var_9824_groups_0, pad = var_9824_pad_0, pad_type = var_9824_pad_type_0, strides = var_9824_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_9809_cast_fp16)[name = string("op_9824_cast_fp16")]; + tensor var_9828 = const()[name = string("op_9828"), val = tensor([0, 2, 1])]; + int32 var_9839 = const()[name = string("op_9839"), val = int32(-1)]; + fp16 const_504_promoted_to_fp16 = const()[name = string("const_504_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_189_cast_fp16 = transpose(perm = var_9828, x = var_9824_cast_fp16)[name = string("transpose_128")]; + tensor var_9841_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_504_promoted_to_fp16)[name = string("op_9841_cast_fp16")]; + bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; + tensor input_231_cast_fp16 = concat(axis = var_9839, interleave = input_231_interleave_0, values = (hidden_states_189_cast_fp16, var_9841_cast_fp16))[name = string("input_231_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_9836_to_fp16 = const()[name = string("op_9836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_9836_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; + tensor var_9855_to_fp16 = const()[name = string("op_9855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601646464)))]; + tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_9855_to_fp16)[name = string("attn_output_119_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + int32 var_9868 = const()[name = string("op_9868"), val = int32(-1)]; + fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9870_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9870_cast_fp16")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233_cast_fp16 = concat(axis = var_9868, interleave = input_233_interleave_0, values = (hidden_states_191_cast_fp16, var_9870_cast_fp16))[name = string("input_233_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_9865_to_fp16 = const()[name = string("op_9865_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_9865_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; + tensor var_9884_to_fp16 = const()[name = string("op_9884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601648832)))]; + tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_9884_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_9896 = const()[name = string("op_9896"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_9897_cast_fp16 = transpose(perm = var_9896, x = x_189_cast_fp16)[name = string("transpose_127")]; + tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_9897_cast_fp16)[name = string("input_235_cast_fp16")]; + string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; + tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; + tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; + int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601651200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607623232))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607844480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613816512))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; + string var_9922_mode_0 = const()[name = string("op_9922_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9922_cast_fp16 = gelu(mode = var_9922_mode_0, x = x_191_cast_fp16)[name = string("op_9922_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = var_9922_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614037760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620009792))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; + tensor var_9930_axes_0 = const()[name = string("op_9930_axes_0"), val = tensor([2])]; + tensor var_9930_cast_fp16 = squeeze(axes = var_9930_axes_0, x = e_23_cast_fp16)[name = string("op_9930_cast_fp16")]; + tensor var_9931 = const()[name = string("op_9931"), val = tensor([0, 2, 1])]; + int32 var_9942 = const()[name = string("op_9942"), val = int32(-1)]; + fp16 const_512_promoted_to_fp16 = const()[name = string("const_512_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_193_cast_fp16 = transpose(perm = var_9931, x = var_9930_cast_fp16)[name = string("transpose_126")]; + tensor var_9944_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_512_promoted_to_fp16)[name = string("op_9944_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_9942, interleave = input_239_interleave_0, values = (hidden_states_193_cast_fp16, var_9944_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_9939_to_fp16 = const()[name = string("op_9939_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_9939_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor var_9958_to_fp16 = const()[name = string("op_9958_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620046720)))]; + tensor hidden_states_195_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_9958_to_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = hidden_states_195_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + int32 var_10012 = const()[name = string("op_10012"), val = int32(-1)]; + fp16 const_517_promoted_to_fp16 = const()[name = string("const_517_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10014_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = const_517_promoted_to_fp16)[name = string("op_10014_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_10012, interleave = input_241_interleave_0, values = (hidden_states_197_cast_fp16, var_10014_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_10009_to_fp16 = const()[name = string("op_10009_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_10009_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor var_10028_to_fp16 = const()[name = string("op_10028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620049088)))]; + tensor hidden_states_199_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_10028_to_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor var_10039 = const()[name = string("op_10039"), val = tensor([0, 2, 1])]; + tensor var_10042_axes_0 = const()[name = string("op_10042_axes_0"), val = tensor([2])]; + tensor var_10040_cast_fp16 = transpose(perm = var_10039, x = hidden_states_199_cast_fp16)[name = string("transpose_125")]; + tensor var_10042_cast_fp16 = expand_dims(axes = var_10042_axes_0, x = var_10040_cast_fp16)[name = string("op_10042_cast_fp16")]; + string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; + tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; + tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; + int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; + tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_10042_cast_fp16)[name = string("query_states_97")]; + string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; + tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; + tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; + int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; + tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_10042_cast_fp16)[name = string("key_states_121")]; + string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; + tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; + tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; + int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; + tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_10042_cast_fp16)[name = string("value_states_97")]; + tensor var_10084 = const()[name = string("op_10084"), val = tensor([1, 4, 256, 64])]; + tensor var_10085 = reshape(shape = var_10084, x = query_states_97)[name = string("op_10085")]; + tensor var_10090 = const()[name = string("op_10090"), val = tensor([0, 1, 3, 2])]; + tensor var_10095 = const()[name = string("op_10095"), val = tensor([1, 1, 256, 64])]; + tensor var_10096 = reshape(shape = var_10095, x = key_states_121)[name = string("op_10096")]; + tensor var_10101 = const()[name = string("op_10101"), val = tensor([0, 1, 3, 2])]; + tensor var_10106 = const()[name = string("op_10106"), val = tensor([1, 1, 256, 64])]; + tensor var_10107 = reshape(shape = var_10106, x = value_states_97)[name = string("op_10107")]; + tensor var_10112 = const()[name = string("op_10112"), val = tensor([0, 1, 3, 2])]; + int32 var_10123 = const()[name = string("op_10123"), val = int32(-1)]; + fp16 const_522_promoted = const()[name = string("const_522_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_201 = transpose(perm = var_10090, x = var_10085)[name = string("transpose_124")]; + tensor var_10125 = mul(x = hidden_states_201, y = const_522_promoted)[name = string("op_10125")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_10123, interleave = input_245_interleave_0, values = (hidden_states_201, var_10125))[name = string("input_245")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_10120_to_fp16 = const()[name = string("op_10120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_10120_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor var_10139_to_fp16 = const()[name = string("op_10139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620051456)))]; + tensor q_25_cast_fp16 = mul(x = normed_295, y = var_10139_to_fp16)[name = string("q_25_cast_fp16")]; + int32 var_10150 = const()[name = string("op_10150"), val = int32(-1)]; + fp16 const_526_promoted = const()[name = string("const_526_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_203 = transpose(perm = var_10101, x = var_10096)[name = string("transpose_123")]; + tensor var_10152 = mul(x = hidden_states_203, y = const_526_promoted)[name = string("op_10152")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_10150, interleave = input_247_interleave_0, values = (hidden_states_203, var_10152))[name = string("input_247")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_10147_to_fp16 = const()[name = string("op_10147_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_10147_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor var_10166_to_fp16 = const()[name = string("op_10166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052032)))]; + tensor k_25_cast_fp16 = mul(x = normed_299, y = var_10166_to_fp16)[name = string("k_25_cast_fp16")]; + tensor var_10180_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5)[name = string("op_10180_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10201_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10201_cast_fp16")]; + int32 var_10203 = const()[name = string("op_10203"), val = int32(-1)]; + bool var_10204_interleave_0 = const()[name = string("op_10204_interleave_0"), val = bool(false)]; + tensor var_10204_cast_fp16 = concat(axis = var_10203, interleave = var_10204_interleave_0, values = (var_10201_cast_fp16, x1_49_cast_fp16))[name = string("op_10204_cast_fp16")]; + tensor var_10205_cast_fp16 = mul(x = var_10204_cast_fp16, y = sin_5)[name = string("op_10205_cast_fp16")]; + tensor query_states_99_cast_fp16 = add(x = var_10180_cast_fp16, y = var_10205_cast_fp16)[name = string("query_states_99_cast_fp16")]; + tensor var_10208_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5)[name = string("op_10208_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_535_promoted_to_fp16 = const()[name = string("const_535_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10229_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_535_promoted_to_fp16)[name = string("op_10229_cast_fp16")]; + int32 var_10231 = const()[name = string("op_10231"), val = int32(-1)]; + bool var_10232_interleave_0 = const()[name = string("op_10232_interleave_0"), val = bool(false)]; + tensor var_10232_cast_fp16 = concat(axis = var_10231, interleave = var_10232_interleave_0, values = (var_10229_cast_fp16, x1_51_cast_fp16))[name = string("op_10232_cast_fp16")]; + tensor var_10233_cast_fp16 = mul(x = var_10232_cast_fp16, y = sin_5)[name = string("op_10233_cast_fp16")]; + tensor key_states_123_cast_fp16 = add(x = var_10208_cast_fp16, y = var_10233_cast_fp16)[name = string("key_states_123_cast_fp16")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([10])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([11])]; + int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; + bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; + tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; + tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; + tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; + int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; + bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; + tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, end_pos_1, concat_219_values3_0))[name = string("concat_219")]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = key_states_123_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_128_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_128")]; + tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([32])]; + tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; + tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; + tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([33])]; + int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; + bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; + tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; + tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; + int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; + bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; + tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, end_pos_1, concat_223_values3_0))[name = string("concat_223")]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_99 = transpose(perm = var_10112, x = var_10107)[name = string("transpose_122")]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = value_states_99, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_129_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_129")]; + tensor var_10332_begin_0 = const()[name = string("op_10332_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_10332_end_0 = const()[name = string("op_10332_end_0"), val = tensor([11, 1, 512, 256])]; + tensor var_10332_end_mask_0 = const()[name = string("op_10332_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10332_cast_fp16 = slice_by_index(begin = var_10332_begin_0, end = var_10332_end_0, end_mask = var_10332_end_mask_0, x = coreml_update_state_77)[name = string("op_10332_cast_fp16")]; + tensor var_10339_begin_0 = const()[name = string("op_10339_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_10339_end_0 = const()[name = string("op_10339_end_0"), val = tensor([33, 1, 512, 256])]; + tensor var_10339_end_mask_0 = const()[name = string("op_10339_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10339_cast_fp16 = slice_by_index(begin = var_10339_begin_0, end = var_10339_end_0, end_mask = var_10339_end_mask_0, x = coreml_update_state_77)[name = string("op_10339_cast_fp16")]; + tensor var_10378 = const()[name = string("op_10378"), val = tensor([1, 4, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_10378, x = var_10332_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_10398 = const()[name = string("op_10398"), val = tensor([1, 4, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_10398, x = var_10339_cast_fp16)[name = string("x_203_cast_fp16")]; + bool var_10425_transpose_x_0 = const()[name = string("op_10425_transpose_x_0"), val = bool(false)]; + bool var_10425_transpose_y_0 = const()[name = string("op_10425_transpose_y_0"), val = bool(true)]; + tensor var_10425 = matmul(transpose_x = var_10425_transpose_x_0, transpose_y = var_10425_transpose_y_0, x = query_states_99_cast_fp16, y = x_197_cast_fp16)[name = string("op_10425")]; + fp16 var_10426_to_fp16 = const()[name = string("op_10426_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_10425, y = var_10426_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = mask_slice_1)[name = string("attn_weights_51_cast_fp16")]; + int32 var_10461 = const()[name = string("op_10461"), val = int32(-1)]; + tensor var_10463_cast_fp16 = softmax(axis = var_10461, x = attn_weights_51_cast_fp16)[name = string("op_10463_cast_fp16")]; + tensor concat_228 = const()[name = string("concat_228"), val = tensor([4, 64, 512])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_10463_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor concat_229 = const()[name = string("concat_229"), val = tensor([4, 512, 256])]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_203_cast_fp16)[name = string("reshape_37_cast_fp16")]; + bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; + bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; + tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; + tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 4, 64, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor var_10475_perm_0 = const()[name = string("op_10475_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10494 = const()[name = string("op_10494"), val = tensor([1, 64, 1024])]; + tensor var_10475_cast_fp16 = transpose(perm = var_10475_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_121")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_10494, x = var_10475_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_10499 = const()[name = string("op_10499"), val = tensor([0, 2, 1])]; + string var_10515_pad_type_0 = const()[name = string("op_10515_pad_type_0"), val = string("valid")]; + int32 var_10515_groups_0 = const()[name = string("op_10515_groups_0"), val = int32(1)]; + tensor var_10515_strides_0 = const()[name = string("op_10515_strides_0"), val = tensor([1])]; + tensor var_10515_pad_0 = const()[name = string("op_10515_pad_0"), val = tensor([0, 0])]; + tensor var_10515_dilations_0 = const()[name = string("op_10515_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620937408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10500_cast_fp16 = transpose(perm = var_10499, x = attn_output_125_cast_fp16)[name = string("transpose_120")]; + tensor var_10515_cast_fp16 = conv(dilations = var_10515_dilations_0, groups = var_10515_groups_0, pad = var_10515_pad_0, pad_type = var_10515_pad_type_0, strides = var_10515_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_10500_cast_fp16)[name = string("op_10515_cast_fp16")]; + tensor var_10519 = const()[name = string("op_10519"), val = tensor([0, 2, 1])]; + int32 var_10530 = const()[name = string("op_10530"), val = int32(-1)]; + fp16 const_547_promoted_to_fp16 = const()[name = string("const_547_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_205_cast_fp16 = transpose(perm = var_10519, x = var_10515_cast_fp16)[name = string("transpose_119")]; + tensor var_10532_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_547_promoted_to_fp16)[name = string("op_10532_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_10530, interleave = input_251_interleave_0, values = (hidden_states_205_cast_fp16, var_10532_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_10527_to_fp16 = const()[name = string("op_10527_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_10527_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor var_10546_to_fp16 = const()[name = string("op_10546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620974336)))]; + tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_10546_to_fp16)[name = string("attn_output_129_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = add(x = hidden_states_197_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + int32 var_10559 = const()[name = string("op_10559"), val = int32(-1)]; + fp16 const_551_promoted_to_fp16 = const()[name = string("const_551_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10561_cast_fp16 = mul(x = hidden_states_207_cast_fp16, y = const_551_promoted_to_fp16)[name = string("op_10561_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_10559, interleave = input_253_interleave_0, values = (hidden_states_207_cast_fp16, var_10561_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_10556_to_fp16 = const()[name = string("op_10556_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_10556_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor var_10575_to_fp16 = const()[name = string("op_10575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620976704)))]; + tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_10575_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_10587 = const()[name = string("op_10587"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_10588_cast_fp16 = transpose(perm = var_10587, x = x_205_cast_fp16)[name = string("transpose_118")]; + tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_10588_cast_fp16)[name = string("input_255_cast_fp16")]; + string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; + tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; + tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; + int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620979072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626951104))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627172352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633144384))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; + string var_10613_mode_0 = const()[name = string("op_10613_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10613_cast_fp16 = gelu(mode = var_10613_mode_0, x = x_207_cast_fp16)[name = string("op_10613_cast_fp16")]; + tensor input_257_cast_fp16 = mul(x = var_10613_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633365632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639337664))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; + tensor var_10621_axes_0 = const()[name = string("op_10621_axes_0"), val = tensor([2])]; + tensor var_10621_cast_fp16 = squeeze(axes = var_10621_axes_0, x = e_25_cast_fp16)[name = string("op_10621_cast_fp16")]; + tensor var_10622 = const()[name = string("op_10622"), val = tensor([0, 2, 1])]; + int32 var_10633 = const()[name = string("op_10633"), val = int32(-1)]; + fp16 const_555_promoted_to_fp16 = const()[name = string("const_555_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_209_cast_fp16 = transpose(perm = var_10622, x = var_10621_cast_fp16)[name = string("transpose_117")]; + tensor var_10635_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_555_promoted_to_fp16)[name = string("op_10635_cast_fp16")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259_cast_fp16 = concat(axis = var_10633, interleave = input_259_interleave_0, values = (hidden_states_209_cast_fp16, var_10635_cast_fp16))[name = string("input_259_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_10630_to_fp16 = const()[name = string("op_10630_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_10630_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; + tensor var_10649_to_fp16 = const()[name = string("op_10649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639374592)))]; + tensor hidden_states_211_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_10649_to_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = hidden_states_211_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + int32 var_10703 = const()[name = string("op_10703"), val = int32(-1)]; + fp16 const_560_promoted_to_fp16 = const()[name = string("const_560_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10705_cast_fp16 = mul(x = hidden_states_213_cast_fp16, y = const_560_promoted_to_fp16)[name = string("op_10705_cast_fp16")]; + bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; + tensor input_261_cast_fp16 = concat(axis = var_10703, interleave = input_261_interleave_0, values = (hidden_states_213_cast_fp16, var_10705_cast_fp16))[name = string("input_261_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_10700_to_fp16 = const()[name = string("op_10700_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_10700_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; + tensor var_10719_to_fp16 = const()[name = string("op_10719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639376960)))]; + tensor hidden_states_215_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_10719_to_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor var_10730 = const()[name = string("op_10730"), val = tensor([0, 2, 1])]; + tensor var_10733_axes_0 = const()[name = string("op_10733_axes_0"), val = tensor([2])]; + tensor var_10731_cast_fp16 = transpose(perm = var_10730, x = hidden_states_215_cast_fp16)[name = string("transpose_116")]; + tensor var_10733_cast_fp16 = expand_dims(axes = var_10733_axes_0, x = var_10731_cast_fp16)[name = string("op_10733_cast_fp16")]; + string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; + tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; + tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; + int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; + tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_10733_cast_fp16)[name = string("query_states_105")]; + string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; + tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; + tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; + int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; + tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_10733_cast_fp16)[name = string("key_states_131")]; + string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; + tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; + tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; + int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; + tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_10733_cast_fp16)[name = string("value_states_105")]; + tensor var_10775 = const()[name = string("op_10775"), val = tensor([1, 4, 256, 64])]; + tensor var_10776 = reshape(shape = var_10775, x = query_states_105)[name = string("op_10776")]; + tensor var_10781 = const()[name = string("op_10781"), val = tensor([0, 1, 3, 2])]; + tensor var_10786 = const()[name = string("op_10786"), val = tensor([1, 1, 256, 64])]; + tensor var_10787 = reshape(shape = var_10786, x = key_states_131)[name = string("op_10787")]; + tensor var_10792 = const()[name = string("op_10792"), val = tensor([0, 1, 3, 2])]; + tensor var_10797 = const()[name = string("op_10797"), val = tensor([1, 1, 256, 64])]; + tensor var_10798 = reshape(shape = var_10797, x = value_states_105)[name = string("op_10798")]; + tensor var_10803 = const()[name = string("op_10803"), val = tensor([0, 1, 3, 2])]; + int32 var_10814 = const()[name = string("op_10814"), val = int32(-1)]; + fp16 const_565_promoted = const()[name = string("const_565_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_217 = transpose(perm = var_10781, x = var_10776)[name = string("transpose_115")]; + tensor var_10816 = mul(x = hidden_states_217, y = const_565_promoted)[name = string("op_10816")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265 = concat(axis = var_10814, interleave = input_265_interleave_0, values = (hidden_states_217, var_10816))[name = string("input_265")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_10811_to_fp16 = const()[name = string("op_10811_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_10811_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; + tensor var_10830_to_fp16 = const()[name = string("op_10830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379328)))]; + tensor q_27_cast_fp16 = mul(x = normed_319, y = var_10830_to_fp16)[name = string("q_27_cast_fp16")]; + int32 var_10841 = const()[name = string("op_10841"), val = int32(-1)]; + fp16 const_569_promoted = const()[name = string("const_569_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_219 = transpose(perm = var_10792, x = var_10787)[name = string("transpose_114")]; + tensor var_10843 = mul(x = hidden_states_219, y = const_569_promoted)[name = string("op_10843")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267 = concat(axis = var_10841, interleave = input_267_interleave_0, values = (hidden_states_219, var_10843))[name = string("input_267")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_10838_to_fp16 = const()[name = string("op_10838_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_10838_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; + tensor var_10857_to_fp16 = const()[name = string("op_10857_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379904)))]; + tensor k_27_cast_fp16 = mul(x = normed_323, y = var_10857_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_10871_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5)[name = string("op_10871_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_575_promoted_to_fp16 = const()[name = string("const_575_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10892_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_575_promoted_to_fp16)[name = string("op_10892_cast_fp16")]; + int32 var_10894 = const()[name = string("op_10894"), val = int32(-1)]; + bool var_10895_interleave_0 = const()[name = string("op_10895_interleave_0"), val = bool(false)]; + tensor var_10895_cast_fp16 = concat(axis = var_10894, interleave = var_10895_interleave_0, values = (var_10892_cast_fp16, x1_53_cast_fp16))[name = string("op_10895_cast_fp16")]; + tensor var_10896_cast_fp16 = mul(x = var_10895_cast_fp16, y = sin_5)[name = string("op_10896_cast_fp16")]; + tensor query_states_107_cast_fp16 = add(x = var_10871_cast_fp16, y = var_10896_cast_fp16)[name = string("query_states_107_cast_fp16")]; + tensor var_10899_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5)[name = string("op_10899_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_578_promoted_to_fp16 = const()[name = string("const_578_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10920_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_578_promoted_to_fp16)[name = string("op_10920_cast_fp16")]; + int32 var_10922 = const()[name = string("op_10922"), val = int32(-1)]; + bool var_10923_interleave_0 = const()[name = string("op_10923_interleave_0"), val = bool(false)]; + tensor var_10923_cast_fp16 = concat(axis = var_10922, interleave = var_10923_interleave_0, values = (var_10920_cast_fp16, x1_55_cast_fp16))[name = string("op_10923_cast_fp16")]; + tensor var_10924_cast_fp16 = mul(x = var_10923_cast_fp16, y = sin_5)[name = string("op_10924_cast_fp16")]; + tensor key_states_133_cast_fp16 = add(x = var_10899_cast_fp16, y = var_10924_cast_fp16)[name = string("key_states_133_cast_fp16")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([11])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([12])]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; + tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; + tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; + int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; + bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; + tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, end_pos_1, concat_237_values3_0))[name = string("concat_237")]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = key_states_133_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_130_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_130")]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([33])]; + tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; + tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; + tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([34])]; + int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; + bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; + tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; + tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; + tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; + int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; + bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; + tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, end_pos_1, concat_241_values3_0))[name = string("concat_241")]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_107 = transpose(perm = var_10803, x = var_10798)[name = string("transpose_113")]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = value_states_107, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_131_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_131")]; + tensor var_11023_begin_0 = const()[name = string("op_11023_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_11023_end_0 = const()[name = string("op_11023_end_0"), val = tensor([12, 1, 512, 256])]; + tensor var_11023_end_mask_0 = const()[name = string("op_11023_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11023_cast_fp16 = slice_by_index(begin = var_11023_begin_0, end = var_11023_end_0, end_mask = var_11023_end_mask_0, x = coreml_update_state_79)[name = string("op_11023_cast_fp16")]; + tensor var_11030_begin_0 = const()[name = string("op_11030_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_11030_end_0 = const()[name = string("op_11030_end_0"), val = tensor([34, 1, 512, 256])]; + tensor var_11030_end_mask_0 = const()[name = string("op_11030_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11030_cast_fp16 = slice_by_index(begin = var_11030_begin_0, end = var_11030_end_0, end_mask = var_11030_end_mask_0, x = coreml_update_state_79)[name = string("op_11030_cast_fp16")]; + tensor var_11069 = const()[name = string("op_11069"), val = tensor([1, 4, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_11069, x = var_11023_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_11089 = const()[name = string("op_11089"), val = tensor([1, 4, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_11089, x = var_11030_cast_fp16)[name = string("x_219_cast_fp16")]; + bool var_11116_transpose_x_0 = const()[name = string("op_11116_transpose_x_0"), val = bool(false)]; + bool var_11116_transpose_y_0 = const()[name = string("op_11116_transpose_y_0"), val = bool(true)]; + tensor var_11116 = matmul(transpose_x = var_11116_transpose_x_0, transpose_y = var_11116_transpose_y_0, x = query_states_107_cast_fp16, y = x_213_cast_fp16)[name = string("op_11116")]; + fp16 var_11117_to_fp16 = const()[name = string("op_11117_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_53_cast_fp16 = mul(x = var_11116, y = var_11117_to_fp16)[name = string("attn_weights_53_cast_fp16")]; + tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = mask_slice_1)[name = string("attn_weights_55_cast_fp16")]; + int32 var_11152 = const()[name = string("op_11152"), val = int32(-1)]; + tensor var_11154_cast_fp16 = softmax(axis = var_11152, x = attn_weights_55_cast_fp16)[name = string("op_11154_cast_fp16")]; + tensor concat_246 = const()[name = string("concat_246"), val = tensor([4, 64, 512])]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_11154_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor concat_247 = const()[name = string("concat_247"), val = tensor([4, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_219_cast_fp16)[name = string("reshape_40_cast_fp16")]; + bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; + bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; + tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; + tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 4, 64, 256])]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor var_11166_perm_0 = const()[name = string("op_11166_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11185 = const()[name = string("op_11185"), val = tensor([1, 64, 1024])]; + tensor var_11166_cast_fp16 = transpose(perm = var_11166_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_112")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_11185, x = var_11166_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_11190 = const()[name = string("op_11190"), val = tensor([0, 2, 1])]; + string var_11206_pad_type_0 = const()[name = string("op_11206_pad_type_0"), val = string("valid")]; + int32 var_11206_groups_0 = const()[name = string("op_11206_groups_0"), val = int32(1)]; + tensor var_11206_strides_0 = const()[name = string("op_11206_strides_0"), val = tensor([1])]; + tensor var_11206_pad_0 = const()[name = string("op_11206_pad_0"), val = tensor([0, 0])]; + tensor var_11206_dilations_0 = const()[name = string("op_11206_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639380480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640265280))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11191_cast_fp16 = transpose(perm = var_11190, x = attn_output_135_cast_fp16)[name = string("transpose_111")]; + tensor var_11206_cast_fp16 = conv(dilations = var_11206_dilations_0, groups = var_11206_groups_0, pad = var_11206_pad_0, pad_type = var_11206_pad_type_0, strides = var_11206_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_11191_cast_fp16)[name = string("op_11206_cast_fp16")]; + tensor var_11210 = const()[name = string("op_11210"), val = tensor([0, 2, 1])]; + int32 var_11221 = const()[name = string("op_11221"), val = int32(-1)]; + fp16 const_590_promoted_to_fp16 = const()[name = string("const_590_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_221_cast_fp16 = transpose(perm = var_11210, x = var_11206_cast_fp16)[name = string("transpose_110")]; + tensor var_11223_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_590_promoted_to_fp16)[name = string("op_11223_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_11221, interleave = input_271_interleave_0, values = (hidden_states_221_cast_fp16, var_11223_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_11218_to_fp16 = const()[name = string("op_11218_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_11218_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; + tensor var_11237_to_fp16 = const()[name = string("op_11237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640302208)))]; + tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_11237_to_fp16)[name = string("attn_output_139_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = add(x = hidden_states_213_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + int32 var_11250 = const()[name = string("op_11250"), val = int32(-1)]; + fp16 const_594_promoted_to_fp16 = const()[name = string("const_594_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11252_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = const_594_promoted_to_fp16)[name = string("op_11252_cast_fp16")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273_cast_fp16 = concat(axis = var_11250, interleave = input_273_interleave_0, values = (hidden_states_223_cast_fp16, var_11252_cast_fp16))[name = string("input_273_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_11247_to_fp16 = const()[name = string("op_11247_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_11247_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; + tensor var_11266_to_fp16 = const()[name = string("op_11266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640304576)))]; + tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_11266_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_11278 = const()[name = string("op_11278"), val = tensor([0, 2, 1])]; + tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; + tensor var_11279_cast_fp16 = transpose(perm = var_11278, x = x_221_cast_fp16)[name = string("transpose_109")]; + tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_11279_cast_fp16)[name = string("input_275_cast_fp16")]; + string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; + tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; + tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; + int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640306944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646278976))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646500224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652472256))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; + string var_11304_mode_0 = const()[name = string("op_11304_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11304_cast_fp16 = gelu(mode = var_11304_mode_0, x = x_223_cast_fp16)[name = string("op_11304_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_11304_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652693504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658665536))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; + tensor var_11312_axes_0 = const()[name = string("op_11312_axes_0"), val = tensor([2])]; + tensor var_11312_cast_fp16 = squeeze(axes = var_11312_axes_0, x = e_27_cast_fp16)[name = string("op_11312_cast_fp16")]; + tensor var_11313 = const()[name = string("op_11313"), val = tensor([0, 2, 1])]; + int32 var_11324 = const()[name = string("op_11324"), val = int32(-1)]; + fp16 const_598_promoted_to_fp16 = const()[name = string("const_598_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_225_cast_fp16 = transpose(perm = var_11313, x = var_11312_cast_fp16)[name = string("transpose_108")]; + tensor var_11326_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_598_promoted_to_fp16)[name = string("op_11326_cast_fp16")]; + bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; + tensor input_279_cast_fp16 = concat(axis = var_11324, interleave = input_279_interleave_0, values = (hidden_states_225_cast_fp16, var_11326_cast_fp16))[name = string("input_279_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_11321_to_fp16 = const()[name = string("op_11321_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_11321_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor var_11340_to_fp16 = const()[name = string("op_11340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658702464)))]; + tensor hidden_states_227_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_11340_to_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_223_cast_fp16, y = hidden_states_227_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_11394 = const()[name = string("op_11394"), val = int32(-1)]; + fp16 const_603_promoted_to_fp16 = const()[name = string("const_603_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11396_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_603_promoted_to_fp16)[name = string("op_11396_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_11394, interleave = input_281_interleave_0, values = (hidden_states_229_cast_fp16, var_11396_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_11391_to_fp16 = const()[name = string("op_11391_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_11391_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor var_11410_to_fp16 = const()[name = string("op_11410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658704832)))]; + tensor hidden_states_231_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_11410_to_fp16)[name = string("hidden_states_231_cast_fp16")]; + tensor var_11421 = const()[name = string("op_11421"), val = tensor([0, 2, 1])]; + tensor var_11424_axes_0 = const()[name = string("op_11424_axes_0"), val = tensor([2])]; + tensor var_11422_cast_fp16 = transpose(perm = var_11421, x = hidden_states_231_cast_fp16)[name = string("transpose_107")]; + tensor var_11424_cast_fp16 = expand_dims(axes = var_11424_axes_0, x = var_11422_cast_fp16)[name = string("op_11424_cast_fp16")]; + string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; + tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; + tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; + int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; + tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_11424_cast_fp16)[name = string("query_states_113")]; + string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; + tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; + tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; + int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; + tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_11424_cast_fp16)[name = string("key_states_141")]; + string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; + tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; + tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; + int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; + tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_11424_cast_fp16)[name = string("value_states_113")]; + tensor var_11466 = const()[name = string("op_11466"), val = tensor([1, 4, 256, 64])]; + tensor var_11467 = reshape(shape = var_11466, x = query_states_113)[name = string("op_11467")]; + tensor var_11472 = const()[name = string("op_11472"), val = tensor([0, 1, 3, 2])]; + tensor var_11477 = const()[name = string("op_11477"), val = tensor([1, 1, 256, 64])]; + tensor var_11478 = reshape(shape = var_11477, x = key_states_141)[name = string("op_11478")]; + tensor var_11483 = const()[name = string("op_11483"), val = tensor([0, 1, 3, 2])]; + tensor var_11488 = const()[name = string("op_11488"), val = tensor([1, 1, 256, 64])]; + tensor var_11489 = reshape(shape = var_11488, x = value_states_113)[name = string("op_11489")]; + tensor var_11494 = const()[name = string("op_11494"), val = tensor([0, 1, 3, 2])]; + int32 var_11505 = const()[name = string("op_11505"), val = int32(-1)]; + fp16 const_608_promoted = const()[name = string("const_608_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_233 = transpose(perm = var_11472, x = var_11467)[name = string("transpose_106")]; + tensor var_11507 = mul(x = hidden_states_233, y = const_608_promoted)[name = string("op_11507")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285 = concat(axis = var_11505, interleave = input_285_interleave_0, values = (hidden_states_233, var_11507))[name = string("input_285")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_11502_to_fp16 = const()[name = string("op_11502_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_11502_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor var_11521_to_fp16 = const()[name = string("op_11521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707200)))]; + tensor q_29_cast_fp16 = mul(x = normed_343, y = var_11521_to_fp16)[name = string("q_29_cast_fp16")]; + int32 var_11532 = const()[name = string("op_11532"), val = int32(-1)]; + fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_235 = transpose(perm = var_11483, x = var_11478)[name = string("transpose_105")]; + tensor var_11534 = mul(x = hidden_states_235, y = const_612_promoted)[name = string("op_11534")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287 = concat(axis = var_11532, interleave = input_287_interleave_0, values = (hidden_states_235, var_11534))[name = string("input_287")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_11529_to_fp16 = const()[name = string("op_11529_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_11529_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor var_11548_to_fp16 = const()[name = string("op_11548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707776)))]; + tensor k_29_cast_fp16 = mul(x = normed_347, y = var_11548_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_11562_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5)[name = string("op_11562_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_618_promoted_to_fp16 = const()[name = string("const_618_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11583_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_618_promoted_to_fp16)[name = string("op_11583_cast_fp16")]; + int32 var_11585 = const()[name = string("op_11585"), val = int32(-1)]; + bool var_11586_interleave_0 = const()[name = string("op_11586_interleave_0"), val = bool(false)]; + tensor var_11586_cast_fp16 = concat(axis = var_11585, interleave = var_11586_interleave_0, values = (var_11583_cast_fp16, x1_57_cast_fp16))[name = string("op_11586_cast_fp16")]; + tensor var_11587_cast_fp16 = mul(x = var_11586_cast_fp16, y = sin_5)[name = string("op_11587_cast_fp16")]; + tensor query_states_115_cast_fp16 = add(x = var_11562_cast_fp16, y = var_11587_cast_fp16)[name = string("query_states_115_cast_fp16")]; + tensor var_11590_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5)[name = string("op_11590_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_621_promoted_to_fp16 = const()[name = string("const_621_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11611_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_621_promoted_to_fp16)[name = string("op_11611_cast_fp16")]; + int32 var_11613 = const()[name = string("op_11613"), val = int32(-1)]; + bool var_11614_interleave_0 = const()[name = string("op_11614_interleave_0"), val = bool(false)]; + tensor var_11614_cast_fp16 = concat(axis = var_11613, interleave = var_11614_interleave_0, values = (var_11611_cast_fp16, x1_59_cast_fp16))[name = string("op_11614_cast_fp16")]; + tensor var_11615_cast_fp16 = mul(x = var_11614_cast_fp16, y = sin_5)[name = string("op_11615_cast_fp16")]; + tensor key_states_143_cast_fp16 = add(x = var_11590_cast_fp16, y = var_11615_cast_fp16)[name = string("key_states_143_cast_fp16")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([12])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([13])]; + int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; + bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; + tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; + tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; + tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; + int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; + bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; + tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, end_pos_1, concat_255_values3_0))[name = string("concat_255")]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = key_states_143_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_132_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_132")]; + tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([34])]; + tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; + tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([35])]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; + tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; + tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; + int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; + bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; + tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, end_pos_1, concat_259_values3_0))[name = string("concat_259")]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_115 = transpose(perm = var_11494, x = var_11489)[name = string("transpose_104")]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = value_states_115, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_133_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_133")]; + tensor var_11714_begin_0 = const()[name = string("op_11714_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_11714_end_0 = const()[name = string("op_11714_end_0"), val = tensor([13, 1, 512, 256])]; + tensor var_11714_end_mask_0 = const()[name = string("op_11714_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11714_cast_fp16 = slice_by_index(begin = var_11714_begin_0, end = var_11714_end_0, end_mask = var_11714_end_mask_0, x = coreml_update_state_81)[name = string("op_11714_cast_fp16")]; + tensor var_11721_begin_0 = const()[name = string("op_11721_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_11721_end_0 = const()[name = string("op_11721_end_0"), val = tensor([35, 1, 512, 256])]; + tensor var_11721_end_mask_0 = const()[name = string("op_11721_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11721_cast_fp16 = slice_by_index(begin = var_11721_begin_0, end = var_11721_end_0, end_mask = var_11721_end_mask_0, x = coreml_update_state_81)[name = string("op_11721_cast_fp16")]; + tensor var_11760 = const()[name = string("op_11760"), val = tensor([1, 4, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_11760, x = var_11714_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_11780 = const()[name = string("op_11780"), val = tensor([1, 4, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_11780, x = var_11721_cast_fp16)[name = string("x_235_cast_fp16")]; + bool var_11807_transpose_x_0 = const()[name = string("op_11807_transpose_x_0"), val = bool(false)]; + bool var_11807_transpose_y_0 = const()[name = string("op_11807_transpose_y_0"), val = bool(true)]; + tensor var_11807 = matmul(transpose_x = var_11807_transpose_x_0, transpose_y = var_11807_transpose_y_0, x = query_states_115_cast_fp16, y = x_229_cast_fp16)[name = string("op_11807")]; + fp16 var_11808_to_fp16 = const()[name = string("op_11808_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_57_cast_fp16 = mul(x = var_11807, y = var_11808_to_fp16)[name = string("attn_weights_57_cast_fp16")]; + tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = mask_slice_1)[name = string("attn_weights_59_cast_fp16")]; + int32 var_11843 = const()[name = string("op_11843"), val = int32(-1)]; + tensor var_11845_cast_fp16 = softmax(axis = var_11843, x = attn_weights_59_cast_fp16)[name = string("op_11845_cast_fp16")]; + tensor concat_264 = const()[name = string("concat_264"), val = tensor([4, 64, 512])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_11845_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor concat_265 = const()[name = string("concat_265"), val = tensor([4, 512, 256])]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_235_cast_fp16)[name = string("reshape_43_cast_fp16")]; + bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; + bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; + tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; + tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 4, 64, 256])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor var_11857_perm_0 = const()[name = string("op_11857_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11876 = const()[name = string("op_11876"), val = tensor([1, 64, 1024])]; + tensor var_11857_cast_fp16 = transpose(perm = var_11857_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_103")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_11876, x = var_11857_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_11881 = const()[name = string("op_11881"), val = tensor([0, 2, 1])]; + string var_11897_pad_type_0 = const()[name = string("op_11897_pad_type_0"), val = string("valid")]; + int32 var_11897_groups_0 = const()[name = string("op_11897_groups_0"), val = int32(1)]; + tensor var_11897_strides_0 = const()[name = string("op_11897_strides_0"), val = tensor([1])]; + tensor var_11897_pad_0 = const()[name = string("op_11897_pad_0"), val = tensor([0, 0])]; + tensor var_11897_dilations_0 = const()[name = string("op_11897_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658708352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659593152))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11882_cast_fp16 = transpose(perm = var_11881, x = attn_output_145_cast_fp16)[name = string("transpose_102")]; + tensor var_11897_cast_fp16 = conv(dilations = var_11897_dilations_0, groups = var_11897_groups_0, pad = var_11897_pad_0, pad_type = var_11897_pad_type_0, strides = var_11897_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_11882_cast_fp16)[name = string("op_11897_cast_fp16")]; + tensor var_11901 = const()[name = string("op_11901"), val = tensor([0, 2, 1])]; + int32 var_11912 = const()[name = string("op_11912"), val = int32(-1)]; + fp16 const_633_promoted_to_fp16 = const()[name = string("const_633_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_237_cast_fp16 = transpose(perm = var_11901, x = var_11897_cast_fp16)[name = string("transpose_101")]; + tensor var_11914_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_633_promoted_to_fp16)[name = string("op_11914_cast_fp16")]; + bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; + tensor input_291_cast_fp16 = concat(axis = var_11912, interleave = input_291_interleave_0, values = (hidden_states_237_cast_fp16, var_11914_cast_fp16))[name = string("input_291_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_11909_to_fp16 = const()[name = string("op_11909_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_11909_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor var_11928_to_fp16 = const()[name = string("op_11928_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659630080)))]; + tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_11928_to_fp16)[name = string("attn_output_149_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_11941 = const()[name = string("op_11941"), val = int32(-1)]; + fp16 const_637_promoted_to_fp16 = const()[name = string("const_637_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11943_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_637_promoted_to_fp16)[name = string("op_11943_cast_fp16")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293_cast_fp16 = concat(axis = var_11941, interleave = input_293_interleave_0, values = (hidden_states_239_cast_fp16, var_11943_cast_fp16))[name = string("input_293_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_11938_to_fp16 = const()[name = string("op_11938_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_11938_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor var_11957_to_fp16 = const()[name = string("op_11957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659632448)))]; + tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_11957_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_11969 = const()[name = string("op_11969"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_11970_cast_fp16 = transpose(perm = var_11969, x = x_237_cast_fp16)[name = string("transpose_100")]; + tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_11970_cast_fp16)[name = string("input_295_cast_fp16")]; + string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; + tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; + tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; + int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659634816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665606848))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665828096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671800128))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; + string var_11995_mode_0 = const()[name = string("op_11995_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11995_cast_fp16 = gelu(mode = var_11995_mode_0, x = x_239_cast_fp16)[name = string("op_11995_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = var_11995_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672021376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677993408))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; + tensor var_12003_axes_0 = const()[name = string("op_12003_axes_0"), val = tensor([2])]; + tensor var_12003_cast_fp16 = squeeze(axes = var_12003_axes_0, x = e_29_cast_fp16)[name = string("op_12003_cast_fp16")]; + tensor var_12004 = const()[name = string("op_12004"), val = tensor([0, 2, 1])]; + int32 var_12015 = const()[name = string("op_12015"), val = int32(-1)]; + fp16 const_641_promoted_to_fp16 = const()[name = string("const_641_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_241_cast_fp16 = transpose(perm = var_12004, x = var_12003_cast_fp16)[name = string("transpose_99")]; + tensor var_12017_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_641_promoted_to_fp16)[name = string("op_12017_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_12015, interleave = input_299_interleave_0, values = (hidden_states_241_cast_fp16, var_12017_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_12012_to_fp16 = const()[name = string("op_12012_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_12012_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; + tensor var_12031_to_fp16 = const()[name = string("op_12031_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678030336)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_12031_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor hidden_states_245_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; + int32 var_12085 = const()[name = string("op_12085"), val = int32(-1)]; + fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12087_cast_fp16 = mul(x = hidden_states_245_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12087_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_12085, interleave = input_301_interleave_0, values = (hidden_states_245_cast_fp16, var_12087_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_12082_to_fp16 = const()[name = string("op_12082_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_12082_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; + tensor var_12101_to_fp16 = const()[name = string("op_12101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678032704)))]; + tensor hidden_states_247_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_12101_to_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_12112 = const()[name = string("op_12112"), val = tensor([0, 2, 1])]; + tensor var_12115_axes_0 = const()[name = string("op_12115_axes_0"), val = tensor([2])]; + tensor var_12113_cast_fp16 = transpose(perm = var_12112, x = hidden_states_247_cast_fp16)[name = string("transpose_98")]; + tensor var_12115_cast_fp16 = expand_dims(axes = var_12115_axes_0, x = var_12113_cast_fp16)[name = string("op_12115_cast_fp16")]; + string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; + tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; + tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; + int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; + tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_12115_cast_fp16)[name = string("query_states_121")]; + string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; + tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; + tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; + int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; + tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_12115_cast_fp16)[name = string("key_states_151")]; + string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; + tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; + tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; + int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; + tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_12115_cast_fp16)[name = string("value_states_121")]; + tensor var_12157 = const()[name = string("op_12157"), val = tensor([1, 4, 256, 64])]; + tensor var_12158 = reshape(shape = var_12157, x = query_states_121)[name = string("op_12158")]; + tensor var_12163 = const()[name = string("op_12163"), val = tensor([0, 1, 3, 2])]; + tensor var_12168 = const()[name = string("op_12168"), val = tensor([1, 1, 256, 64])]; + tensor var_12169 = reshape(shape = var_12168, x = key_states_151)[name = string("op_12169")]; + tensor var_12174 = const()[name = string("op_12174"), val = tensor([0, 1, 3, 2])]; + tensor var_12179 = const()[name = string("op_12179"), val = tensor([1, 1, 256, 64])]; + tensor var_12180 = reshape(shape = var_12179, x = value_states_121)[name = string("op_12180")]; + tensor var_12185 = const()[name = string("op_12185"), val = tensor([0, 1, 3, 2])]; + int32 var_12196 = const()[name = string("op_12196"), val = int32(-1)]; + fp16 const_651_promoted = const()[name = string("const_651_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_249 = transpose(perm = var_12163, x = var_12158)[name = string("transpose_97")]; + tensor var_12198 = mul(x = hidden_states_249, y = const_651_promoted)[name = string("op_12198")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_12196, interleave = input_305_interleave_0, values = (hidden_states_249, var_12198))[name = string("input_305")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_12193_to_fp16 = const()[name = string("op_12193_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_12193_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; + tensor var_12212_to_fp16 = const()[name = string("op_12212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035072)))]; + tensor q_31_cast_fp16 = mul(x = normed_367, y = var_12212_to_fp16)[name = string("q_31_cast_fp16")]; + int32 var_12223 = const()[name = string("op_12223"), val = int32(-1)]; + fp16 const_655_promoted = const()[name = string("const_655_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_251 = transpose(perm = var_12174, x = var_12169)[name = string("transpose_96")]; + tensor var_12225 = mul(x = hidden_states_251, y = const_655_promoted)[name = string("op_12225")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_12223, interleave = input_307_interleave_0, values = (hidden_states_251, var_12225))[name = string("input_307")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_12220_to_fp16 = const()[name = string("op_12220_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_12220_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; + tensor var_12239_to_fp16 = const()[name = string("op_12239_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035648)))]; + tensor k_31_cast_fp16 = mul(x = normed_371, y = var_12239_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_12253_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5)[name = string("op_12253_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_661_promoted_to_fp16 = const()[name = string("const_661_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12274_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_661_promoted_to_fp16)[name = string("op_12274_cast_fp16")]; + int32 var_12276 = const()[name = string("op_12276"), val = int32(-1)]; + bool var_12277_interleave_0 = const()[name = string("op_12277_interleave_0"), val = bool(false)]; + tensor var_12277_cast_fp16 = concat(axis = var_12276, interleave = var_12277_interleave_0, values = (var_12274_cast_fp16, x1_61_cast_fp16))[name = string("op_12277_cast_fp16")]; + tensor var_12278_cast_fp16 = mul(x = var_12277_cast_fp16, y = sin_5)[name = string("op_12278_cast_fp16")]; + tensor query_states_123_cast_fp16 = add(x = var_12253_cast_fp16, y = var_12278_cast_fp16)[name = string("query_states_123_cast_fp16")]; + tensor var_12281_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5)[name = string("op_12281_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_664_promoted_to_fp16 = const()[name = string("const_664_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12302_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_664_promoted_to_fp16)[name = string("op_12302_cast_fp16")]; + int32 var_12304 = const()[name = string("op_12304"), val = int32(-1)]; + bool var_12305_interleave_0 = const()[name = string("op_12305_interleave_0"), val = bool(false)]; + tensor var_12305_cast_fp16 = concat(axis = var_12304, interleave = var_12305_interleave_0, values = (var_12302_cast_fp16, x1_63_cast_fp16))[name = string("op_12305_cast_fp16")]; + tensor var_12306_cast_fp16 = mul(x = var_12305_cast_fp16, y = sin_5)[name = string("op_12306_cast_fp16")]; + tensor key_states_153_cast_fp16 = add(x = var_12281_cast_fp16, y = var_12306_cast_fp16)[name = string("key_states_153_cast_fp16")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([13])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([14])]; + int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; + bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; + tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; + tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; + tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; + int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; + bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; + tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, end_pos_1, concat_273_values3_0))[name = string("concat_273")]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = key_states_153_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_134_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_134")]; + tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([35])]; + tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; + tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; + tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([36])]; + int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; + bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; + tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; + tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; + tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; + int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; + bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; + tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, end_pos_1, concat_277_values3_0))[name = string("concat_277")]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_123 = transpose(perm = var_12185, x = var_12180)[name = string("transpose_95")]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = value_states_123, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_135_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_135")]; + tensor var_12405_begin_0 = const()[name = string("op_12405_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_12405_end_0 = const()[name = string("op_12405_end_0"), val = tensor([14, 1, 512, 256])]; + tensor var_12405_end_mask_0 = const()[name = string("op_12405_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12405_cast_fp16 = slice_by_index(begin = var_12405_begin_0, end = var_12405_end_0, end_mask = var_12405_end_mask_0, x = coreml_update_state_83)[name = string("op_12405_cast_fp16")]; + tensor var_12412_begin_0 = const()[name = string("op_12412_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_12412_end_0 = const()[name = string("op_12412_end_0"), val = tensor([36, 1, 512, 256])]; + tensor var_12412_end_mask_0 = const()[name = string("op_12412_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12412_cast_fp16 = slice_by_index(begin = var_12412_begin_0, end = var_12412_end_0, end_mask = var_12412_end_mask_0, x = coreml_update_state_83)[name = string("op_12412_cast_fp16")]; + tensor var_12451 = const()[name = string("op_12451"), val = tensor([1, 4, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_12451, x = var_12405_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_12471 = const()[name = string("op_12471"), val = tensor([1, 4, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_12471, x = var_12412_cast_fp16)[name = string("x_251_cast_fp16")]; + bool var_12498_transpose_x_0 = const()[name = string("op_12498_transpose_x_0"), val = bool(false)]; + bool var_12498_transpose_y_0 = const()[name = string("op_12498_transpose_y_0"), val = bool(true)]; + tensor var_12498 = matmul(transpose_x = var_12498_transpose_x_0, transpose_y = var_12498_transpose_y_0, x = query_states_123_cast_fp16, y = x_245_cast_fp16)[name = string("op_12498")]; + fp16 var_12499_to_fp16 = const()[name = string("op_12499_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_12498, y = var_12499_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = mask_slice_1)[name = string("attn_weights_63_cast_fp16")]; + int32 var_12534 = const()[name = string("op_12534"), val = int32(-1)]; + tensor var_12536_cast_fp16 = softmax(axis = var_12534, x = attn_weights_63_cast_fp16)[name = string("op_12536_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([4, 64, 512])]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_12536_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([4, 512, 256])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_251_cast_fp16)[name = string("reshape_46_cast_fp16")]; + bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; + bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; + tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; + tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 4, 64, 256])]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor var_12548_perm_0 = const()[name = string("op_12548_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12567 = const()[name = string("op_12567"), val = tensor([1, 64, 1024])]; + tensor var_12548_cast_fp16 = transpose(perm = var_12548_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_94")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_12567, x = var_12548_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_12572 = const()[name = string("op_12572"), val = tensor([0, 2, 1])]; + string var_12588_pad_type_0 = const()[name = string("op_12588_pad_type_0"), val = string("valid")]; + int32 var_12588_groups_0 = const()[name = string("op_12588_groups_0"), val = int32(1)]; + tensor var_12588_strides_0 = const()[name = string("op_12588_strides_0"), val = tensor([1])]; + tensor var_12588_pad_0 = const()[name = string("op_12588_pad_0"), val = tensor([0, 0])]; + tensor var_12588_dilations_0 = const()[name = string("op_12588_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678036224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678921024))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12573_cast_fp16 = transpose(perm = var_12572, x = attn_output_155_cast_fp16)[name = string("transpose_93")]; + tensor var_12588_cast_fp16 = conv(dilations = var_12588_dilations_0, groups = var_12588_groups_0, pad = var_12588_pad_0, pad_type = var_12588_pad_type_0, strides = var_12588_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_12573_cast_fp16)[name = string("op_12588_cast_fp16")]; + tensor var_12592 = const()[name = string("op_12592"), val = tensor([0, 2, 1])]; + int32 var_12603 = const()[name = string("op_12603"), val = int32(-1)]; + fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_253_cast_fp16 = transpose(perm = var_12592, x = var_12588_cast_fp16)[name = string("transpose_92")]; + tensor var_12605_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12605_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_12603, interleave = input_311_interleave_0, values = (hidden_states_253_cast_fp16, var_12605_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_12600_to_fp16 = const()[name = string("op_12600_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_12600_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; + tensor var_12619_to_fp16 = const()[name = string("op_12619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678957952)))]; + tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_12619_to_fp16)[name = string("attn_output_159_cast_fp16")]; + tensor hidden_states_255_cast_fp16 = add(x = hidden_states_245_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + int32 var_12632 = const()[name = string("op_12632"), val = int32(-1)]; + fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12634_cast_fp16 = mul(x = hidden_states_255_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12634_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_12632, interleave = input_313_interleave_0, values = (hidden_states_255_cast_fp16, var_12634_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_12629_to_fp16 = const()[name = string("op_12629_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_12629_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; + tensor var_12648_to_fp16 = const()[name = string("op_12648_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678960320)))]; + tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_12648_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_12660 = const()[name = string("op_12660"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_12661_cast_fp16 = transpose(perm = var_12660, x = x_253_cast_fp16)[name = string("transpose_91")]; + tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_12661_cast_fp16)[name = string("input_315_cast_fp16")]; + string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; + tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; + tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; + int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678962688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684934720))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685155968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691128000))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; + string var_12686_mode_0 = const()[name = string("op_12686_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12686_cast_fp16 = gelu(mode = var_12686_mode_0, x = x_255_cast_fp16)[name = string("op_12686_cast_fp16")]; + tensor input_317_cast_fp16 = mul(x = var_12686_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691349248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697321280))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; + tensor var_12694_axes_0 = const()[name = string("op_12694_axes_0"), val = tensor([2])]; + tensor var_12694_cast_fp16 = squeeze(axes = var_12694_axes_0, x = e_31_cast_fp16)[name = string("op_12694_cast_fp16")]; + tensor var_12695 = const()[name = string("op_12695"), val = tensor([0, 2, 1])]; + int32 var_12706 = const()[name = string("op_12706"), val = int32(-1)]; + fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_257_cast_fp16 = transpose(perm = var_12695, x = var_12694_cast_fp16)[name = string("transpose_90")]; + tensor var_12708_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12708_cast_fp16")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319_cast_fp16 = concat(axis = var_12706, interleave = input_319_interleave_0, values = (hidden_states_257_cast_fp16, var_12708_cast_fp16))[name = string("input_319_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_12703_to_fp16 = const()[name = string("op_12703_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_12703_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor var_12722_to_fp16 = const()[name = string("op_12722_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697358208)))]; + tensor hidden_states_259_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_12722_to_fp16)[name = string("hidden_states_259_cast_fp16")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_255_cast_fp16, y = hidden_states_259_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; + int32 var_12776 = const()[name = string("op_12776"), val = int32(-1)]; + fp16 const_689_promoted_to_fp16 = const()[name = string("const_689_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12778_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_689_promoted_to_fp16)[name = string("op_12778_cast_fp16")]; + bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; + tensor input_321_cast_fp16 = concat(axis = var_12776, interleave = input_321_interleave_0, values = (hidden_states_261_cast_fp16, var_12778_cast_fp16))[name = string("input_321_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_12773_to_fp16 = const()[name = string("op_12773_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_12773_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor var_12792_to_fp16 = const()[name = string("op_12792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697360576)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_12792_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_12803 = const()[name = string("op_12803"), val = tensor([0, 2, 1])]; + tensor var_12806_axes_0 = const()[name = string("op_12806_axes_0"), val = tensor([2])]; + tensor var_12804_cast_fp16 = transpose(perm = var_12803, x = hidden_states_263_cast_fp16)[name = string("transpose_89")]; + tensor var_12806_cast_fp16 = expand_dims(axes = var_12806_axes_0, x = var_12804_cast_fp16)[name = string("op_12806_cast_fp16")]; + string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; + tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; + tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; + int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; + tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_12806_cast_fp16)[name = string("query_states_129")]; + string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; + tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; + tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; + int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; + tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_12806_cast_fp16)[name = string("key_states_161")]; + string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; + tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; + tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; + int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; + tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_12806_cast_fp16)[name = string("value_states_129")]; + tensor var_12848 = const()[name = string("op_12848"), val = tensor([1, 4, 256, 64])]; + tensor var_12849 = reshape(shape = var_12848, x = query_states_129)[name = string("op_12849")]; + tensor var_12854 = const()[name = string("op_12854"), val = tensor([0, 1, 3, 2])]; + tensor var_12859 = const()[name = string("op_12859"), val = tensor([1, 1, 256, 64])]; + tensor var_12860 = reshape(shape = var_12859, x = key_states_161)[name = string("op_12860")]; + tensor var_12865 = const()[name = string("op_12865"), val = tensor([0, 1, 3, 2])]; + tensor var_12870 = const()[name = string("op_12870"), val = tensor([1, 1, 256, 64])]; + tensor var_12871 = reshape(shape = var_12870, x = value_states_129)[name = string("op_12871")]; + tensor var_12876 = const()[name = string("op_12876"), val = tensor([0, 1, 3, 2])]; + int32 var_12887 = const()[name = string("op_12887"), val = int32(-1)]; + fp16 const_694_promoted = const()[name = string("const_694_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_265 = transpose(perm = var_12854, x = var_12849)[name = string("transpose_88")]; + tensor var_12889 = mul(x = hidden_states_265, y = const_694_promoted)[name = string("op_12889")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325 = concat(axis = var_12887, interleave = input_325_interleave_0, values = (hidden_states_265, var_12889))[name = string("input_325")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_12884_to_fp16 = const()[name = string("op_12884_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_12884_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor var_12903_to_fp16 = const()[name = string("op_12903_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697362944)))]; + tensor q_33_cast_fp16 = mul(x = normed_391, y = var_12903_to_fp16)[name = string("q_33_cast_fp16")]; + int32 var_12914 = const()[name = string("op_12914"), val = int32(-1)]; + fp16 const_698_promoted = const()[name = string("const_698_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_267 = transpose(perm = var_12865, x = var_12860)[name = string("transpose_87")]; + tensor var_12916 = mul(x = hidden_states_267, y = const_698_promoted)[name = string("op_12916")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327 = concat(axis = var_12914, interleave = input_327_interleave_0, values = (hidden_states_267, var_12916))[name = string("input_327")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_12911_to_fp16 = const()[name = string("op_12911_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_12911_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor var_12930_to_fp16 = const()[name = string("op_12930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697363520)))]; + tensor k_33_cast_fp16 = mul(x = normed_395, y = var_12930_to_fp16)[name = string("k_33_cast_fp16")]; + tensor var_12944_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5)[name = string("op_12944_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_704_promoted_to_fp16 = const()[name = string("const_704_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12965_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_704_promoted_to_fp16)[name = string("op_12965_cast_fp16")]; + int32 var_12967 = const()[name = string("op_12967"), val = int32(-1)]; + bool var_12968_interleave_0 = const()[name = string("op_12968_interleave_0"), val = bool(false)]; + tensor var_12968_cast_fp16 = concat(axis = var_12967, interleave = var_12968_interleave_0, values = (var_12965_cast_fp16, x1_65_cast_fp16))[name = string("op_12968_cast_fp16")]; + tensor var_12969_cast_fp16 = mul(x = var_12968_cast_fp16, y = sin_5)[name = string("op_12969_cast_fp16")]; + tensor query_states_131_cast_fp16 = add(x = var_12944_cast_fp16, y = var_12969_cast_fp16)[name = string("query_states_131_cast_fp16")]; + tensor var_12972_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5)[name = string("op_12972_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_707_promoted_to_fp16 = const()[name = string("const_707_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12993_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_707_promoted_to_fp16)[name = string("op_12993_cast_fp16")]; + int32 var_12995 = const()[name = string("op_12995"), val = int32(-1)]; + bool var_12996_interleave_0 = const()[name = string("op_12996_interleave_0"), val = bool(false)]; + tensor var_12996_cast_fp16 = concat(axis = var_12995, interleave = var_12996_interleave_0, values = (var_12993_cast_fp16, x1_67_cast_fp16))[name = string("op_12996_cast_fp16")]; + tensor var_12997_cast_fp16 = mul(x = var_12996_cast_fp16, y = sin_5)[name = string("op_12997_cast_fp16")]; + tensor key_states_163_cast_fp16 = add(x = var_12972_cast_fp16, y = var_12997_cast_fp16)[name = string("key_states_163_cast_fp16")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([14])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([15])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; + tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; + tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; + int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; + bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; + tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, end_pos_1, concat_291_values3_0))[name = string("concat_291")]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = key_states_163_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_136_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_136")]; + tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([36])]; + tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; + tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; + tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([37])]; + int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; + bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; + tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; + tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; + tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; + int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; + bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; + tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, end_pos_1, concat_295_values3_0))[name = string("concat_295")]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_131 = transpose(perm = var_12876, x = var_12871)[name = string("transpose_86")]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = value_states_131, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_137_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_137")]; + tensor var_13096_begin_0 = const()[name = string("op_13096_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_13096_end_0 = const()[name = string("op_13096_end_0"), val = tensor([15, 1, 512, 256])]; + tensor var_13096_end_mask_0 = const()[name = string("op_13096_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13096_cast_fp16 = slice_by_index(begin = var_13096_begin_0, end = var_13096_end_0, end_mask = var_13096_end_mask_0, x = coreml_update_state_85)[name = string("op_13096_cast_fp16")]; + tensor var_13103_begin_0 = const()[name = string("op_13103_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_13103_end_0 = const()[name = string("op_13103_end_0"), val = tensor([37, 1, 512, 256])]; + tensor var_13103_end_mask_0 = const()[name = string("op_13103_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13103_cast_fp16 = slice_by_index(begin = var_13103_begin_0, end = var_13103_end_0, end_mask = var_13103_end_mask_0, x = coreml_update_state_85)[name = string("op_13103_cast_fp16")]; + tensor var_13142 = const()[name = string("op_13142"), val = tensor([1, 4, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_13142, x = var_13096_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_13162 = const()[name = string("op_13162"), val = tensor([1, 4, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_13162, x = var_13103_cast_fp16)[name = string("x_267_cast_fp16")]; + bool var_13189_transpose_x_0 = const()[name = string("op_13189_transpose_x_0"), val = bool(false)]; + bool var_13189_transpose_y_0 = const()[name = string("op_13189_transpose_y_0"), val = bool(true)]; + tensor var_13189 = matmul(transpose_x = var_13189_transpose_x_0, transpose_y = var_13189_transpose_y_0, x = query_states_131_cast_fp16, y = x_261_cast_fp16)[name = string("op_13189")]; + fp16 var_13190_to_fp16 = const()[name = string("op_13190_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_65_cast_fp16 = mul(x = var_13189, y = var_13190_to_fp16)[name = string("attn_weights_65_cast_fp16")]; + tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = mask_slice_1)[name = string("attn_weights_67_cast_fp16")]; + int32 var_13225 = const()[name = string("op_13225"), val = int32(-1)]; + tensor var_13227_cast_fp16 = softmax(axis = var_13225, x = attn_weights_67_cast_fp16)[name = string("op_13227_cast_fp16")]; + tensor concat_300 = const()[name = string("concat_300"), val = tensor([4, 64, 512])]; + tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_13227_cast_fp16)[name = string("reshape_48_cast_fp16")]; + tensor concat_301 = const()[name = string("concat_301"), val = tensor([4, 512, 256])]; + tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_267_cast_fp16)[name = string("reshape_49_cast_fp16")]; + bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; + bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; + tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; + tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 4, 64, 256])]; + tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; + tensor var_13239_perm_0 = const()[name = string("op_13239_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13258 = const()[name = string("op_13258"), val = tensor([1, 64, 1024])]; + tensor var_13239_cast_fp16 = transpose(perm = var_13239_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_85")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_13258, x = var_13239_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_13263 = const()[name = string("op_13263"), val = tensor([0, 2, 1])]; + string var_13279_pad_type_0 = const()[name = string("op_13279_pad_type_0"), val = string("valid")]; + int32 var_13279_groups_0 = const()[name = string("op_13279_groups_0"), val = int32(1)]; + tensor var_13279_strides_0 = const()[name = string("op_13279_strides_0"), val = tensor([1])]; + tensor var_13279_pad_0 = const()[name = string("op_13279_pad_0"), val = tensor([0, 0])]; + tensor var_13279_dilations_0 = const()[name = string("op_13279_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697364096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698248896))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13264_cast_fp16 = transpose(perm = var_13263, x = attn_output_165_cast_fp16)[name = string("transpose_84")]; + tensor var_13279_cast_fp16 = conv(dilations = var_13279_dilations_0, groups = var_13279_groups_0, pad = var_13279_pad_0, pad_type = var_13279_pad_type_0, strides = var_13279_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_13264_cast_fp16)[name = string("op_13279_cast_fp16")]; + tensor var_13283 = const()[name = string("op_13283"), val = tensor([0, 2, 1])]; + int32 var_13294 = const()[name = string("op_13294"), val = int32(-1)]; + fp16 const_719_promoted_to_fp16 = const()[name = string("const_719_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_269_cast_fp16 = transpose(perm = var_13283, x = var_13279_cast_fp16)[name = string("transpose_83")]; + tensor var_13296_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_719_promoted_to_fp16)[name = string("op_13296_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_13294, interleave = input_331_interleave_0, values = (hidden_states_269_cast_fp16, var_13296_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_13291_to_fp16 = const()[name = string("op_13291_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_13291_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor var_13310_to_fp16 = const()[name = string("op_13310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698285824)))]; + tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_13310_to_fp16)[name = string("attn_output_169_cast_fp16")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; + int32 var_13323 = const()[name = string("op_13323"), val = int32(-1)]; + fp16 const_723_promoted_to_fp16 = const()[name = string("const_723_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13325_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_723_promoted_to_fp16)[name = string("op_13325_cast_fp16")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333_cast_fp16 = concat(axis = var_13323, interleave = input_333_interleave_0, values = (hidden_states_271_cast_fp16, var_13325_cast_fp16))[name = string("input_333_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_13320_to_fp16 = const()[name = string("op_13320_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_13320_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor var_13339_to_fp16 = const()[name = string("op_13339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698288192)))]; + tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_13339_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_13351 = const()[name = string("op_13351"), val = tensor([0, 2, 1])]; + tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; + tensor var_13352_cast_fp16 = transpose(perm = var_13351, x = x_269_cast_fp16)[name = string("transpose_82")]; + tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_13352_cast_fp16)[name = string("input_335_cast_fp16")]; + string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; + tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; + tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; + int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704262592))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704483840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710455872))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; + string var_13377_mode_0 = const()[name = string("op_13377_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_13377_cast_fp16 = gelu(mode = var_13377_mode_0, x = x_271_cast_fp16)[name = string("op_13377_cast_fp16")]; + tensor input_337_cast_fp16 = mul(x = var_13377_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710677120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716649152))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; + tensor var_13385_axes_0 = const()[name = string("op_13385_axes_0"), val = tensor([2])]; + tensor var_13385_cast_fp16 = squeeze(axes = var_13385_axes_0, x = e_33_cast_fp16)[name = string("op_13385_cast_fp16")]; + tensor var_13386 = const()[name = string("op_13386"), val = tensor([0, 2, 1])]; + int32 var_13397 = const()[name = string("op_13397"), val = int32(-1)]; + fp16 const_727_promoted_to_fp16 = const()[name = string("const_727_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_273_cast_fp16 = transpose(perm = var_13386, x = var_13385_cast_fp16)[name = string("transpose_81")]; + tensor var_13399_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_727_promoted_to_fp16)[name = string("op_13399_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_13397, interleave = input_339_interleave_0, values = (hidden_states_273_cast_fp16, var_13399_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_13394_to_fp16 = const()[name = string("op_13394_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_13394_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; + tensor var_13413_to_fp16 = const()[name = string("op_13413_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716686080)))]; + tensor hidden_states_275_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_13413_to_fp16)[name = string("hidden_states_275_cast_fp16")]; + tensor hidden_states_277_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = hidden_states_275_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + int32 var_13467 = const()[name = string("op_13467"), val = int32(-1)]; + fp16 const_732_promoted_to_fp16 = const()[name = string("const_732_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13469_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = const_732_promoted_to_fp16)[name = string("op_13469_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_13467, interleave = input_341_interleave_0, values = (hidden_states_277_cast_fp16, var_13469_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_13464_to_fp16 = const()[name = string("op_13464_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_13464_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; + tensor var_13483_to_fp16 = const()[name = string("op_13483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716688448)))]; + tensor hidden_states_279_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_13483_to_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor var_13494 = const()[name = string("op_13494"), val = tensor([0, 2, 1])]; + tensor var_13497_axes_0 = const()[name = string("op_13497_axes_0"), val = tensor([2])]; + tensor var_13495_cast_fp16 = transpose(perm = var_13494, x = hidden_states_279_cast_fp16)[name = string("transpose_80")]; + tensor var_13497_cast_fp16 = expand_dims(axes = var_13497_axes_0, x = var_13495_cast_fp16)[name = string("op_13497_cast_fp16")]; + string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; + tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; + tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; + int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; + tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_13497_cast_fp16)[name = string("query_states_137")]; + string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; + tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; + tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; + int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; + tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_13497_cast_fp16)[name = string("key_states_171")]; + string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; + tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; + tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; + int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; + tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_13497_cast_fp16)[name = string("value_states_137")]; + tensor var_13539 = const()[name = string("op_13539"), val = tensor([1, 4, 256, 64])]; + tensor var_13540 = reshape(shape = var_13539, x = query_states_137)[name = string("op_13540")]; + tensor var_13545 = const()[name = string("op_13545"), val = tensor([0, 1, 3, 2])]; + tensor var_13550 = const()[name = string("op_13550"), val = tensor([1, 1, 256, 64])]; + tensor var_13551 = reshape(shape = var_13550, x = key_states_171)[name = string("op_13551")]; + tensor var_13556 = const()[name = string("op_13556"), val = tensor([0, 1, 3, 2])]; + tensor var_13561 = const()[name = string("op_13561"), val = tensor([1, 1, 256, 64])]; + tensor var_13562 = reshape(shape = var_13561, x = value_states_137)[name = string("op_13562")]; + tensor var_13567 = const()[name = string("op_13567"), val = tensor([0, 1, 3, 2])]; + int32 var_13578 = const()[name = string("op_13578"), val = int32(-1)]; + fp16 const_737_promoted = const()[name = string("const_737_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_281 = transpose(perm = var_13545, x = var_13540)[name = string("transpose_79")]; + tensor var_13580 = mul(x = hidden_states_281, y = const_737_promoted)[name = string("op_13580")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_13578, interleave = input_345_interleave_0, values = (hidden_states_281, var_13580))[name = string("input_345")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_13575_to_fp16 = const()[name = string("op_13575_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_13575_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; + tensor var_13594_to_fp16 = const()[name = string("op_13594_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716690816)))]; + tensor q_35_cast_fp16 = mul(x = normed_415, y = var_13594_to_fp16)[name = string("q_35_cast_fp16")]; + int32 var_13605 = const()[name = string("op_13605"), val = int32(-1)]; + fp16 const_741_promoted = const()[name = string("const_741_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_283 = transpose(perm = var_13556, x = var_13551)[name = string("transpose_78")]; + tensor var_13607 = mul(x = hidden_states_283, y = const_741_promoted)[name = string("op_13607")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_13605, interleave = input_347_interleave_0, values = (hidden_states_283, var_13607))[name = string("input_347")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_13602_to_fp16 = const()[name = string("op_13602_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_13602_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; + tensor var_13621_to_fp16 = const()[name = string("op_13621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691392)))]; + tensor k_35_cast_fp16 = mul(x = normed_419, y = var_13621_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_13635_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_35)[name = string("op_13635_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_747_promoted_to_fp16 = const()[name = string("const_747_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13656_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_747_promoted_to_fp16)[name = string("op_13656_cast_fp16")]; + int32 var_13658 = const()[name = string("op_13658"), val = int32(-1)]; + bool var_13659_interleave_0 = const()[name = string("op_13659_interleave_0"), val = bool(false)]; + tensor var_13659_cast_fp16 = concat(axis = var_13658, interleave = var_13659_interleave_0, values = (var_13656_cast_fp16, x1_69_cast_fp16))[name = string("op_13659_cast_fp16")]; + tensor var_13660_cast_fp16 = mul(x = var_13659_cast_fp16, y = sin_35)[name = string("op_13660_cast_fp16")]; + tensor query_states_139_cast_fp16 = add(x = var_13635_cast_fp16, y = var_13660_cast_fp16)[name = string("query_states_139_cast_fp16")]; + tensor var_13663_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_35)[name = string("op_13663_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_750_promoted_to_fp16 = const()[name = string("const_750_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13684_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_750_promoted_to_fp16)[name = string("op_13684_cast_fp16")]; + int32 var_13686 = const()[name = string("op_13686"), val = int32(-1)]; + bool var_13687_interleave_0 = const()[name = string("op_13687_interleave_0"), val = bool(false)]; + tensor var_13687_cast_fp16 = concat(axis = var_13686, interleave = var_13687_interleave_0, values = (var_13684_cast_fp16, x1_71_cast_fp16))[name = string("op_13687_cast_fp16")]; + tensor var_13688_cast_fp16 = mul(x = var_13687_cast_fp16, y = sin_35)[name = string("op_13688_cast_fp16")]; + tensor key_states_173_cast_fp16 = add(x = var_13663_cast_fp16, y = var_13688_cast_fp16)[name = string("key_states_173_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_173_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_138_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_138")]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_139 = transpose(perm = var_13567, x = var_13562)[name = string("transpose_77")]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = value_states_139, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_139_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_139")]; + tensor var_13787_begin_0 = const()[name = string("op_13787_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_13787_end_0 = const()[name = string("op_13787_end_0"), val = tensor([3, 1, 4096, 256])]; + tensor var_13787_end_mask_0 = const()[name = string("op_13787_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13787_cast_fp16 = slice_by_index(begin = var_13787_begin_0, end = var_13787_end_0, end_mask = var_13787_end_mask_0, x = coreml_update_state_87)[name = string("op_13787_cast_fp16")]; + tensor var_13794_begin_0 = const()[name = string("op_13794_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_13794_end_0 = const()[name = string("op_13794_end_0"), val = tensor([7, 1, 4096, 256])]; + tensor var_13794_end_mask_0 = const()[name = string("op_13794_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13794_cast_fp16 = slice_by_index(begin = var_13794_begin_0, end = var_13794_end_0, end_mask = var_13794_end_mask_0, x = coreml_update_state_87)[name = string("op_13794_cast_fp16")]; + tensor var_13833 = const()[name = string("op_13833"), val = tensor([1, 4, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_13833, x = var_13787_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_13853 = const()[name = string("op_13853"), val = tensor([1, 4, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_13853, x = var_13794_cast_fp16)[name = string("x_283_cast_fp16")]; + bool var_13880_transpose_x_0 = const()[name = string("op_13880_transpose_x_0"), val = bool(false)]; + bool var_13880_transpose_y_0 = const()[name = string("op_13880_transpose_y_0"), val = bool(true)]; + tensor var_13880 = matmul(transpose_x = var_13880_transpose_x_0, transpose_y = var_13880_transpose_y_0, x = query_states_139_cast_fp16, y = x_277_cast_fp16)[name = string("op_13880")]; + fp16 var_13881_to_fp16 = const()[name = string("op_13881_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_69_cast_fp16 = mul(x = var_13880, y = var_13881_to_fp16)[name = string("attn_weights_69_cast_fp16")]; + tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; + int32 var_13916 = const()[name = string("op_13916"), val = int32(-1)]; + tensor var_13918_cast_fp16 = softmax(axis = var_13916, x = attn_weights_71_cast_fp16)[name = string("op_13918_cast_fp16")]; + tensor concat_318 = const()[name = string("concat_318"), val = tensor([4, 64, 4096])]; + tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_13918_cast_fp16)[name = string("reshape_51_cast_fp16")]; + tensor concat_319 = const()[name = string("concat_319"), val = tensor([4, 4096, 256])]; + tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_283_cast_fp16)[name = string("reshape_52_cast_fp16")]; + bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; + bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; + tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; + tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 4, 64, 256])]; + tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; + tensor var_13930_perm_0 = const()[name = string("op_13930_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13949 = const()[name = string("op_13949"), val = tensor([1, 64, 1024])]; + tensor var_13930_cast_fp16 = transpose(perm = var_13930_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_76")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_13949, x = var_13930_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_13954 = const()[name = string("op_13954"), val = tensor([0, 2, 1])]; + string var_13970_pad_type_0 = const()[name = string("op_13970_pad_type_0"), val = string("valid")]; + int32 var_13970_groups_0 = const()[name = string("op_13970_groups_0"), val = int32(1)]; + tensor var_13970_strides_0 = const()[name = string("op_13970_strides_0"), val = tensor([1])]; + tensor var_13970_pad_0 = const()[name = string("op_13970_pad_0"), val = tensor([0, 0])]; + tensor var_13970_dilations_0 = const()[name = string("op_13970_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717576768))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13955_cast_fp16 = transpose(perm = var_13954, x = attn_output_175_cast_fp16)[name = string("transpose_75")]; + tensor var_13970_cast_fp16 = conv(dilations = var_13970_dilations_0, groups = var_13970_groups_0, pad = var_13970_pad_0, pad_type = var_13970_pad_type_0, strides = var_13970_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_13955_cast_fp16)[name = string("op_13970_cast_fp16")]; + tensor var_13974 = const()[name = string("op_13974"), val = tensor([0, 2, 1])]; + int32 var_13985 = const()[name = string("op_13985"), val = int32(-1)]; + fp16 const_762_promoted_to_fp16 = const()[name = string("const_762_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_285_cast_fp16 = transpose(perm = var_13974, x = var_13970_cast_fp16)[name = string("transpose_74")]; + tensor var_13987_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_762_promoted_to_fp16)[name = string("op_13987_cast_fp16")]; + bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; + tensor input_351_cast_fp16 = concat(axis = var_13985, interleave = input_351_interleave_0, values = (hidden_states_285_cast_fp16, var_13987_cast_fp16))[name = string("input_351_cast_fp16")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_13982_to_fp16 = const()[name = string("op_13982_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_13982_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; + tensor var_14001_to_fp16 = const()[name = string("op_14001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717613696)))]; + tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_14001_to_fp16)[name = string("attn_output_179_cast_fp16")]; + tensor hidden_states_287_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; + int32 var_14014 = const()[name = string("op_14014"), val = int32(-1)]; + fp16 const_766_promoted_to_fp16 = const()[name = string("const_766_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14016_cast_fp16 = mul(x = hidden_states_287_cast_fp16, y = const_766_promoted_to_fp16)[name = string("op_14016_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_14014, interleave = input_353_interleave_0, values = (hidden_states_287_cast_fp16, var_14016_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_14011_to_fp16 = const()[name = string("op_14011_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_14011_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; + tensor var_14030_to_fp16 = const()[name = string("op_14030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717616064)))]; + tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_14030_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_14042 = const()[name = string("op_14042"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_14043_cast_fp16 = transpose(perm = var_14042, x = x_285_cast_fp16)[name = string("transpose_73")]; + tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_14043_cast_fp16)[name = string("input_355_cast_fp16")]; + string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; + tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; + tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; + int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717618432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723590464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729783744))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; + string var_14068_mode_0 = const()[name = string("op_14068_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14068_cast_fp16 = gelu(mode = var_14068_mode_0, x = x_287_cast_fp16)[name = string("op_14068_cast_fp16")]; + tensor input_357_cast_fp16 = mul(x = var_14068_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730004992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735977024))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; + tensor var_14076_axes_0 = const()[name = string("op_14076_axes_0"), val = tensor([2])]; + tensor var_14076_cast_fp16 = squeeze(axes = var_14076_axes_0, x = e_35_cast_fp16)[name = string("op_14076_cast_fp16")]; + tensor var_14077 = const()[name = string("op_14077"), val = tensor([0, 2, 1])]; + int32 var_14088 = const()[name = string("op_14088"), val = int32(-1)]; + fp16 const_770_promoted_to_fp16 = const()[name = string("const_770_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_289_cast_fp16 = transpose(perm = var_14077, x = var_14076_cast_fp16)[name = string("transpose_72")]; + tensor var_14090_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_770_promoted_to_fp16)[name = string("op_14090_cast_fp16")]; + bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; + tensor input_359_cast_fp16 = concat(axis = var_14088, interleave = input_359_interleave_0, values = (hidden_states_289_cast_fp16, var_14090_cast_fp16))[name = string("input_359_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_14085_to_fp16 = const()[name = string("op_14085_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_14085_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor var_14104_to_fp16 = const()[name = string("op_14104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736013952)))]; + tensor hidden_states_291_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_14104_to_fp16)[name = string("hidden_states_291_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = add(x = hidden_states_287_cast_fp16, y = hidden_states_291_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + int32 var_14158 = const()[name = string("op_14158"), val = int32(-1)]; + fp16 const_775_promoted_to_fp16 = const()[name = string("const_775_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14160_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = const_775_promoted_to_fp16)[name = string("op_14160_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_14158, interleave = input_361_interleave_0, values = (hidden_states_293_cast_fp16, var_14160_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_14155_to_fp16 = const()[name = string("op_14155_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_14155_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor var_14174_to_fp16 = const()[name = string("op_14174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736016320)))]; + tensor hidden_states_295_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_14174_to_fp16)[name = string("hidden_states_295_cast_fp16")]; + tensor var_14185 = const()[name = string("op_14185"), val = tensor([0, 2, 1])]; + tensor var_14188_axes_0 = const()[name = string("op_14188_axes_0"), val = tensor([2])]; + tensor var_14186_cast_fp16 = transpose(perm = var_14185, x = hidden_states_295_cast_fp16)[name = string("transpose_71")]; + tensor var_14188_cast_fp16 = expand_dims(axes = var_14188_axes_0, x = var_14186_cast_fp16)[name = string("op_14188_cast_fp16")]; + string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; + tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; + tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; + int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; + tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_14188_cast_fp16)[name = string("query_states_145")]; + string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; + tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; + tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; + int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; + tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_14188_cast_fp16)[name = string("key_states_181")]; + string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; + tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; + tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; + int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; + tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_14188_cast_fp16)[name = string("value_states_145")]; + tensor var_14230 = const()[name = string("op_14230"), val = tensor([1, 4, 256, 64])]; + tensor var_14231 = reshape(shape = var_14230, x = query_states_145)[name = string("op_14231")]; + tensor var_14236 = const()[name = string("op_14236"), val = tensor([0, 1, 3, 2])]; + tensor var_14241 = const()[name = string("op_14241"), val = tensor([1, 1, 256, 64])]; + tensor var_14242 = reshape(shape = var_14241, x = key_states_181)[name = string("op_14242")]; + tensor var_14247 = const()[name = string("op_14247"), val = tensor([0, 1, 3, 2])]; + tensor var_14252 = const()[name = string("op_14252"), val = tensor([1, 1, 256, 64])]; + tensor var_14253 = reshape(shape = var_14252, x = value_states_145)[name = string("op_14253")]; + tensor var_14258 = const()[name = string("op_14258"), val = tensor([0, 1, 3, 2])]; + int32 var_14269 = const()[name = string("op_14269"), val = int32(-1)]; + fp16 const_780_promoted = const()[name = string("const_780_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_297 = transpose(perm = var_14236, x = var_14231)[name = string("transpose_70")]; + tensor var_14271 = mul(x = hidden_states_297, y = const_780_promoted)[name = string("op_14271")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_14269, interleave = input_365_interleave_0, values = (hidden_states_297, var_14271))[name = string("input_365")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_14266_to_fp16 = const()[name = string("op_14266_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_14266_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor var_14285_to_fp16 = const()[name = string("op_14285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018688)))]; + tensor q_37_cast_fp16 = mul(x = normed_439, y = var_14285_to_fp16)[name = string("q_37_cast_fp16")]; + int32 var_14296 = const()[name = string("op_14296"), val = int32(-1)]; + fp16 const_784_promoted = const()[name = string("const_784_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_299 = transpose(perm = var_14247, x = var_14242)[name = string("transpose_69")]; + tensor var_14298 = mul(x = hidden_states_299, y = const_784_promoted)[name = string("op_14298")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_14296, interleave = input_367_interleave_0, values = (hidden_states_299, var_14298))[name = string("input_367")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_14293_to_fp16 = const()[name = string("op_14293_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_14293_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor var_14312_to_fp16 = const()[name = string("op_14312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019264)))]; + tensor k_37_cast_fp16 = mul(x = normed_443, y = var_14312_to_fp16)[name = string("k_37_cast_fp16")]; + tensor var_14326_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5)[name = string("op_14326_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14347_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14347_cast_fp16")]; + int32 var_14349 = const()[name = string("op_14349"), val = int32(-1)]; + bool var_14350_interleave_0 = const()[name = string("op_14350_interleave_0"), val = bool(false)]; + tensor var_14350_cast_fp16 = concat(axis = var_14349, interleave = var_14350_interleave_0, values = (var_14347_cast_fp16, x1_73_cast_fp16))[name = string("op_14350_cast_fp16")]; + tensor var_14351_cast_fp16 = mul(x = var_14350_cast_fp16, y = sin_5)[name = string("op_14351_cast_fp16")]; + tensor query_states_147_cast_fp16 = add(x = var_14326_cast_fp16, y = var_14351_cast_fp16)[name = string("query_states_147_cast_fp16")]; + tensor var_14354_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5)[name = string("op_14354_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_793_promoted_to_fp16 = const()[name = string("const_793_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14375_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_793_promoted_to_fp16)[name = string("op_14375_cast_fp16")]; + int32 var_14377 = const()[name = string("op_14377"), val = int32(-1)]; + bool var_14378_interleave_0 = const()[name = string("op_14378_interleave_0"), val = bool(false)]; + tensor var_14378_cast_fp16 = concat(axis = var_14377, interleave = var_14378_interleave_0, values = (var_14375_cast_fp16, x1_75_cast_fp16))[name = string("op_14378_cast_fp16")]; + tensor var_14379_cast_fp16 = mul(x = var_14378_cast_fp16, y = sin_5)[name = string("op_14379_cast_fp16")]; + tensor key_states_183_cast_fp16 = add(x = var_14354_cast_fp16, y = var_14379_cast_fp16)[name = string("key_states_183_cast_fp16")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([15])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([16])]; + int32 concat_326_axis_0 = const()[name = string("concat_326_axis_0"), val = int32(0)]; + bool concat_326_interleave_0 = const()[name = string("concat_326_interleave_0"), val = bool(false)]; + tensor concat_326 = concat(axis = concat_326_axis_0, interleave = concat_326_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_326")]; + tensor concat_327_values1_0 = const()[name = string("concat_327_values1_0"), val = tensor([0])]; + tensor concat_327_values3_0 = const()[name = string("concat_327_values3_0"), val = tensor([0])]; + int32 concat_327_axis_0 = const()[name = string("concat_327_axis_0"), val = int32(0)]; + bool concat_327_interleave_0 = const()[name = string("concat_327_interleave_0"), val = bool(false)]; + tensor concat_327 = concat(axis = concat_327_axis_0, interleave = concat_327_interleave_0, values = (expand_dims_220, concat_327_values1_0, end_pos_1, concat_327_values3_0))[name = string("concat_327")]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_326, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_327, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = key_states_183_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_140_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_140")]; + tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([37])]; + tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; + tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([38])]; + int32 concat_330_axis_0 = const()[name = string("concat_330_axis_0"), val = int32(0)]; + bool concat_330_interleave_0 = const()[name = string("concat_330_interleave_0"), val = bool(false)]; + tensor concat_330 = concat(axis = concat_330_axis_0, interleave = concat_330_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_330")]; + tensor concat_331_values1_0 = const()[name = string("concat_331_values1_0"), val = tensor([0])]; + tensor concat_331_values3_0 = const()[name = string("concat_331_values3_0"), val = tensor([0])]; + int32 concat_331_axis_0 = const()[name = string("concat_331_axis_0"), val = int32(0)]; + bool concat_331_interleave_0 = const()[name = string("concat_331_interleave_0"), val = bool(false)]; + tensor concat_331 = concat(axis = concat_331_axis_0, interleave = concat_331_interleave_0, values = (expand_dims_226, concat_331_values1_0, end_pos_1, concat_331_values3_0))[name = string("concat_331")]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_147 = transpose(perm = var_14258, x = var_14253)[name = string("transpose_68")]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_330, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_331, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = value_states_147, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_141_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_141")]; + tensor var_14478_begin_0 = const()[name = string("op_14478_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_14478_end_0 = const()[name = string("op_14478_end_0"), val = tensor([16, 1, 512, 256])]; + tensor var_14478_end_mask_0 = const()[name = string("op_14478_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14478_cast_fp16 = slice_by_index(begin = var_14478_begin_0, end = var_14478_end_0, end_mask = var_14478_end_mask_0, x = coreml_update_state_89)[name = string("op_14478_cast_fp16")]; + tensor var_14485_begin_0 = const()[name = string("op_14485_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_14485_end_0 = const()[name = string("op_14485_end_0"), val = tensor([38, 1, 512, 256])]; + tensor var_14485_end_mask_0 = const()[name = string("op_14485_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14485_cast_fp16 = slice_by_index(begin = var_14485_begin_0, end = var_14485_end_0, end_mask = var_14485_end_mask_0, x = coreml_update_state_89)[name = string("op_14485_cast_fp16")]; + tensor var_14524 = const()[name = string("op_14524"), val = tensor([1, 4, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_14524, x = var_14478_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_14544 = const()[name = string("op_14544"), val = tensor([1, 4, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_14544, x = var_14485_cast_fp16)[name = string("x_299_cast_fp16")]; + bool var_14571_transpose_x_0 = const()[name = string("op_14571_transpose_x_0"), val = bool(false)]; + bool var_14571_transpose_y_0 = const()[name = string("op_14571_transpose_y_0"), val = bool(true)]; + tensor var_14571 = matmul(transpose_x = var_14571_transpose_x_0, transpose_y = var_14571_transpose_y_0, x = query_states_147_cast_fp16, y = x_293_cast_fp16)[name = string("op_14571")]; + fp16 var_14572_to_fp16 = const()[name = string("op_14572_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_14571, y = var_14572_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = mask_slice_1)[name = string("attn_weights_75_cast_fp16")]; + int32 var_14607 = const()[name = string("op_14607"), val = int32(-1)]; + tensor var_14609_cast_fp16 = softmax(axis = var_14607, x = attn_weights_75_cast_fp16)[name = string("op_14609_cast_fp16")]; + tensor concat_336 = const()[name = string("concat_336"), val = tensor([4, 64, 512])]; + tensor reshape_54_cast_fp16 = reshape(shape = concat_336, x = var_14609_cast_fp16)[name = string("reshape_54_cast_fp16")]; + tensor concat_337 = const()[name = string("concat_337"), val = tensor([4, 512, 256])]; + tensor reshape_55_cast_fp16 = reshape(shape = concat_337, x = x_299_cast_fp16)[name = string("reshape_55_cast_fp16")]; + bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; + bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; + tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; + tensor concat_341 = const()[name = string("concat_341"), val = tensor([1, 4, 64, 256])]; + tensor reshape_56_cast_fp16 = reshape(shape = concat_341, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; + tensor var_14621_perm_0 = const()[name = string("op_14621_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14640 = const()[name = string("op_14640"), val = tensor([1, 64, 1024])]; + tensor var_14621_cast_fp16 = transpose(perm = var_14621_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_14640, x = var_14621_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_14645 = const()[name = string("op_14645"), val = tensor([0, 2, 1])]; + string var_14661_pad_type_0 = const()[name = string("op_14661_pad_type_0"), val = string("valid")]; + int32 var_14661_groups_0 = const()[name = string("op_14661_groups_0"), val = int32(1)]; + tensor var_14661_strides_0 = const()[name = string("op_14661_strides_0"), val = tensor([1])]; + tensor var_14661_pad_0 = const()[name = string("op_14661_pad_0"), val = tensor([0, 0])]; + tensor var_14661_dilations_0 = const()[name = string("op_14661_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736904640))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14646_cast_fp16 = transpose(perm = var_14645, x = attn_output_185_cast_fp16)[name = string("transpose_66")]; + tensor var_14661_cast_fp16 = conv(dilations = var_14661_dilations_0, groups = var_14661_groups_0, pad = var_14661_pad_0, pad_type = var_14661_pad_type_0, strides = var_14661_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_14646_cast_fp16)[name = string("op_14661_cast_fp16")]; + tensor var_14665 = const()[name = string("op_14665"), val = tensor([0, 2, 1])]; + int32 var_14676 = const()[name = string("op_14676"), val = int32(-1)]; + fp16 const_805_promoted_to_fp16 = const()[name = string("const_805_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_301_cast_fp16 = transpose(perm = var_14665, x = var_14661_cast_fp16)[name = string("transpose_65")]; + tensor var_14678_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_805_promoted_to_fp16)[name = string("op_14678_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_14676, interleave = input_371_interleave_0, values = (hidden_states_301_cast_fp16, var_14678_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_14673_to_fp16 = const()[name = string("op_14673_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_14673_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; + tensor var_14692_to_fp16 = const()[name = string("op_14692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736941568)))]; + tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_14692_to_fp16)[name = string("attn_output_189_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = add(x = hidden_states_293_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + int32 var_14705 = const()[name = string("op_14705"), val = int32(-1)]; + fp16 const_809_promoted_to_fp16 = const()[name = string("const_809_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14707_cast_fp16 = mul(x = hidden_states_303_cast_fp16, y = const_809_promoted_to_fp16)[name = string("op_14707_cast_fp16")]; + bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; + tensor input_373_cast_fp16 = concat(axis = var_14705, interleave = input_373_interleave_0, values = (hidden_states_303_cast_fp16, var_14707_cast_fp16))[name = string("input_373_cast_fp16")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_14702_to_fp16 = const()[name = string("op_14702_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_14702_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; + tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; + tensor var_14721_to_fp16 = const()[name = string("op_14721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736943936)))]; + tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_14721_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_14733 = const()[name = string("op_14733"), val = tensor([0, 2, 1])]; + tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; + tensor var_14734_cast_fp16 = transpose(perm = var_14733, x = x_301_cast_fp16)[name = string("transpose_64")]; + tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_14734_cast_fp16)[name = string("input_375_cast_fp16")]; + string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; + tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; + tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; + int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736946304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742918336))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743139584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749111616))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; + string var_14759_mode_0 = const()[name = string("op_14759_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14759_cast_fp16 = gelu(mode = var_14759_mode_0, x = x_303_cast_fp16)[name = string("op_14759_cast_fp16")]; + tensor input_377_cast_fp16 = mul(x = var_14759_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749332864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755304896))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; + tensor var_14767_axes_0 = const()[name = string("op_14767_axes_0"), val = tensor([2])]; + tensor var_14767_cast_fp16 = squeeze(axes = var_14767_axes_0, x = e_37_cast_fp16)[name = string("op_14767_cast_fp16")]; + tensor var_14768 = const()[name = string("op_14768"), val = tensor([0, 2, 1])]; + int32 var_14779 = const()[name = string("op_14779"), val = int32(-1)]; + fp16 const_813_promoted_to_fp16 = const()[name = string("const_813_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_305_cast_fp16 = transpose(perm = var_14768, x = var_14767_cast_fp16)[name = string("transpose_63")]; + tensor var_14781_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_813_promoted_to_fp16)[name = string("op_14781_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_14779, interleave = input_379_interleave_0, values = (hidden_states_305_cast_fp16, var_14781_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; + fp16 var_14776_to_fp16 = const()[name = string("op_14776_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_14776_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; + tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; + tensor var_14795_to_fp16 = const()[name = string("op_14795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755341824)))]; + tensor hidden_states_307_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_14795_to_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor hidden_states_309_cast_fp16 = add(x = hidden_states_303_cast_fp16, y = hidden_states_307_cast_fp16)[name = string("hidden_states_309_cast_fp16")]; + int32 var_14849 = const()[name = string("op_14849"), val = int32(-1)]; + fp16 const_818_promoted_to_fp16 = const()[name = string("const_818_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14851_cast_fp16 = mul(x = hidden_states_309_cast_fp16, y = const_818_promoted_to_fp16)[name = string("op_14851_cast_fp16")]; + bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; + tensor input_381_cast_fp16 = concat(axis = var_14849, interleave = input_381_interleave_0, values = (hidden_states_309_cast_fp16, var_14851_cast_fp16))[name = string("input_381_cast_fp16")]; + tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; + fp16 var_14846_to_fp16 = const()[name = string("op_14846_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_14846_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; + tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; + tensor var_14865_to_fp16 = const()[name = string("op_14865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755344192)))]; + tensor hidden_states_311_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_14865_to_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor var_14876 = const()[name = string("op_14876"), val = tensor([0, 2, 1])]; + tensor var_14879_axes_0 = const()[name = string("op_14879_axes_0"), val = tensor([2])]; + tensor var_14877_cast_fp16 = transpose(perm = var_14876, x = hidden_states_311_cast_fp16)[name = string("transpose_62")]; + tensor var_14879_cast_fp16 = expand_dims(axes = var_14879_axes_0, x = var_14877_cast_fp16)[name = string("op_14879_cast_fp16")]; + string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; + tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; + tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; + int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; + tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_14879_cast_fp16)[name = string("query_states_153")]; + string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; + tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; + tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; + int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; + tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_14879_cast_fp16)[name = string("key_states_191")]; + string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; + tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; + tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; + int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; + tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_14879_cast_fp16)[name = string("value_states_153")]; + tensor var_14921 = const()[name = string("op_14921"), val = tensor([1, 4, 256, 64])]; + tensor var_14922 = reshape(shape = var_14921, x = query_states_153)[name = string("op_14922")]; + tensor var_14927 = const()[name = string("op_14927"), val = tensor([0, 1, 3, 2])]; + tensor var_14932 = const()[name = string("op_14932"), val = tensor([1, 1, 256, 64])]; + tensor var_14933 = reshape(shape = var_14932, x = key_states_191)[name = string("op_14933")]; + tensor var_14938 = const()[name = string("op_14938"), val = tensor([0, 1, 3, 2])]; + tensor var_14943 = const()[name = string("op_14943"), val = tensor([1, 1, 256, 64])]; + tensor var_14944 = reshape(shape = var_14943, x = value_states_153)[name = string("op_14944")]; + tensor var_14949 = const()[name = string("op_14949"), val = tensor([0, 1, 3, 2])]; + int32 var_14960 = const()[name = string("op_14960"), val = int32(-1)]; + fp16 const_823_promoted = const()[name = string("const_823_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_313 = transpose(perm = var_14927, x = var_14922)[name = string("transpose_61")]; + tensor var_14962 = mul(x = hidden_states_313, y = const_823_promoted)[name = string("op_14962")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_14960, interleave = input_385_interleave_0, values = (hidden_states_313, var_14962))[name = string("input_385")]; + tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; + fp16 var_14957_to_fp16 = const()[name = string("op_14957_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_14957_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; + tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; + tensor var_14976_to_fp16 = const()[name = string("op_14976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755346560)))]; + tensor q_39_cast_fp16 = mul(x = normed_463, y = var_14976_to_fp16)[name = string("q_39_cast_fp16")]; + int32 var_14987 = const()[name = string("op_14987"), val = int32(-1)]; + fp16 const_827_promoted = const()[name = string("const_827_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_315 = transpose(perm = var_14938, x = var_14933)[name = string("transpose_60")]; + tensor var_14989 = mul(x = hidden_states_315, y = const_827_promoted)[name = string("op_14989")]; + bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; + tensor input_387 = concat(axis = var_14987, interleave = input_387_interleave_0, values = (hidden_states_315, var_14989))[name = string("input_387")]; + tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; + fp16 var_14984_to_fp16 = const()[name = string("op_14984_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_14984_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; + tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; + tensor var_15003_to_fp16 = const()[name = string("op_15003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347136)))]; + tensor k_39_cast_fp16 = mul(x = normed_467, y = var_15003_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_15017_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5)[name = string("op_15017_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_833_promoted_to_fp16 = const()[name = string("const_833_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15038_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_833_promoted_to_fp16)[name = string("op_15038_cast_fp16")]; + int32 var_15040 = const()[name = string("op_15040"), val = int32(-1)]; + bool var_15041_interleave_0 = const()[name = string("op_15041_interleave_0"), val = bool(false)]; + tensor var_15041_cast_fp16 = concat(axis = var_15040, interleave = var_15041_interleave_0, values = (var_15038_cast_fp16, x1_77_cast_fp16))[name = string("op_15041_cast_fp16")]; + tensor var_15042_cast_fp16 = mul(x = var_15041_cast_fp16, y = sin_5)[name = string("op_15042_cast_fp16")]; + tensor query_states_155_cast_fp16 = add(x = var_15017_cast_fp16, y = var_15042_cast_fp16)[name = string("query_states_155_cast_fp16")]; + tensor var_15045_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5)[name = string("op_15045_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15066_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15066_cast_fp16")]; + int32 var_15068 = const()[name = string("op_15068"), val = int32(-1)]; + bool var_15069_interleave_0 = const()[name = string("op_15069_interleave_0"), val = bool(false)]; + tensor var_15069_cast_fp16 = concat(axis = var_15068, interleave = var_15069_interleave_0, values = (var_15066_cast_fp16, x1_79_cast_fp16))[name = string("op_15069_cast_fp16")]; + tensor var_15070_cast_fp16 = mul(x = var_15069_cast_fp16, y = sin_5)[name = string("op_15070_cast_fp16")]; + tensor key_states_193_cast_fp16 = add(x = var_15045_cast_fp16, y = var_15070_cast_fp16)[name = string("key_states_193_cast_fp16")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([16])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([17])]; + int32 concat_344_axis_0 = const()[name = string("concat_344_axis_0"), val = int32(0)]; + bool concat_344_interleave_0 = const()[name = string("concat_344_interleave_0"), val = bool(false)]; + tensor concat_344 = concat(axis = concat_344_axis_0, interleave = concat_344_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_344")]; + tensor concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = tensor([0])]; + tensor concat_345_values3_0 = const()[name = string("concat_345_values3_0"), val = tensor([0])]; + int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; + bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; + tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (expand_dims_232, concat_345_values1_0, end_pos_1, concat_345_values3_0))[name = string("concat_345")]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_344, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_345, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = key_states_193_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_142_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_142")]; + tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([38])]; + tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; + tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; + tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([39])]; + int32 concat_348_axis_0 = const()[name = string("concat_348_axis_0"), val = int32(0)]; + bool concat_348_interleave_0 = const()[name = string("concat_348_interleave_0"), val = bool(false)]; + tensor concat_348 = concat(axis = concat_348_axis_0, interleave = concat_348_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_348")]; + tensor concat_349_values1_0 = const()[name = string("concat_349_values1_0"), val = tensor([0])]; + tensor concat_349_values3_0 = const()[name = string("concat_349_values3_0"), val = tensor([0])]; + int32 concat_349_axis_0 = const()[name = string("concat_349_axis_0"), val = int32(0)]; + bool concat_349_interleave_0 = const()[name = string("concat_349_interleave_0"), val = bool(false)]; + tensor concat_349 = concat(axis = concat_349_axis_0, interleave = concat_349_interleave_0, values = (expand_dims_238, concat_349_values1_0, end_pos_1, concat_349_values3_0))[name = string("concat_349")]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_155 = transpose(perm = var_14949, x = var_14944)[name = string("transpose_59")]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_348, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_349, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = value_states_155, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_143_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_143")]; + tensor var_15169_begin_0 = const()[name = string("op_15169_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_15169_end_0 = const()[name = string("op_15169_end_0"), val = tensor([17, 1, 512, 256])]; + tensor var_15169_end_mask_0 = const()[name = string("op_15169_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15169_cast_fp16 = slice_by_index(begin = var_15169_begin_0, end = var_15169_end_0, end_mask = var_15169_end_mask_0, x = coreml_update_state_91)[name = string("op_15169_cast_fp16")]; + tensor var_15176_begin_0 = const()[name = string("op_15176_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_15176_end_0 = const()[name = string("op_15176_end_0"), val = tensor([39, 1, 512, 256])]; + tensor var_15176_end_mask_0 = const()[name = string("op_15176_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15176_cast_fp16 = slice_by_index(begin = var_15176_begin_0, end = var_15176_end_0, end_mask = var_15176_end_mask_0, x = coreml_update_state_91)[name = string("op_15176_cast_fp16")]; + tensor var_15215 = const()[name = string("op_15215"), val = tensor([1, 4, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_15215, x = var_15169_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_15235 = const()[name = string("op_15235"), val = tensor([1, 4, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_15235, x = var_15176_cast_fp16)[name = string("x_315_cast_fp16")]; + bool var_15262_transpose_x_0 = const()[name = string("op_15262_transpose_x_0"), val = bool(false)]; + bool var_15262_transpose_y_0 = const()[name = string("op_15262_transpose_y_0"), val = bool(true)]; + tensor var_15262 = matmul(transpose_x = var_15262_transpose_x_0, transpose_y = var_15262_transpose_y_0, x = query_states_155_cast_fp16, y = x_309_cast_fp16)[name = string("op_15262")]; + fp16 var_15263_to_fp16 = const()[name = string("op_15263_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_77_cast_fp16 = mul(x = var_15262, y = var_15263_to_fp16)[name = string("attn_weights_77_cast_fp16")]; + tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = mask_slice_1)[name = string("attn_weights_79_cast_fp16")]; + int32 var_15298 = const()[name = string("op_15298"), val = int32(-1)]; + tensor var_15300_cast_fp16 = softmax(axis = var_15298, x = attn_weights_79_cast_fp16)[name = string("op_15300_cast_fp16")]; + tensor concat_354 = const()[name = string("concat_354"), val = tensor([4, 64, 512])]; + tensor reshape_57_cast_fp16 = reshape(shape = concat_354, x = var_15300_cast_fp16)[name = string("reshape_57_cast_fp16")]; + tensor concat_355 = const()[name = string("concat_355"), val = tensor([4, 512, 256])]; + tensor reshape_58_cast_fp16 = reshape(shape = concat_355, x = x_315_cast_fp16)[name = string("reshape_58_cast_fp16")]; + bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; + bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; + tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; + tensor concat_359 = const()[name = string("concat_359"), val = tensor([1, 4, 64, 256])]; + tensor reshape_59_cast_fp16 = reshape(shape = concat_359, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; + tensor var_15312_perm_0 = const()[name = string("op_15312_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15331 = const()[name = string("op_15331"), val = tensor([1, 64, 1024])]; + tensor var_15312_cast_fp16 = transpose(perm = var_15312_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_58")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_15331, x = var_15312_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_15336 = const()[name = string("op_15336"), val = tensor([0, 2, 1])]; + string var_15352_pad_type_0 = const()[name = string("op_15352_pad_type_0"), val = string("valid")]; + int32 var_15352_groups_0 = const()[name = string("op_15352_groups_0"), val = int32(1)]; + tensor var_15352_strides_0 = const()[name = string("op_15352_strides_0"), val = tensor([1])]; + tensor var_15352_pad_0 = const()[name = string("op_15352_pad_0"), val = tensor([0, 0])]; + tensor var_15352_dilations_0 = const()[name = string("op_15352_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756232512))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15337_cast_fp16 = transpose(perm = var_15336, x = attn_output_195_cast_fp16)[name = string("transpose_57")]; + tensor var_15352_cast_fp16 = conv(dilations = var_15352_dilations_0, groups = var_15352_groups_0, pad = var_15352_pad_0, pad_type = var_15352_pad_type_0, strides = var_15352_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_15337_cast_fp16)[name = string("op_15352_cast_fp16")]; + tensor var_15356 = const()[name = string("op_15356"), val = tensor([0, 2, 1])]; + int32 var_15367 = const()[name = string("op_15367"), val = int32(-1)]; + fp16 const_848_promoted_to_fp16 = const()[name = string("const_848_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_317_cast_fp16 = transpose(perm = var_15356, x = var_15352_cast_fp16)[name = string("transpose_56")]; + tensor var_15369_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_848_promoted_to_fp16)[name = string("op_15369_cast_fp16")]; + bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; + tensor input_391_cast_fp16 = concat(axis = var_15367, interleave = input_391_interleave_0, values = (hidden_states_317_cast_fp16, var_15369_cast_fp16))[name = string("input_391_cast_fp16")]; + tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; + fp16 var_15364_to_fp16 = const()[name = string("op_15364_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_15364_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; + tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; + tensor var_15383_to_fp16 = const()[name = string("op_15383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756269440)))]; + tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_15383_to_fp16)[name = string("attn_output_199_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_309_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + int32 var_15396 = const()[name = string("op_15396"), val = int32(-1)]; + fp16 const_852_promoted_to_fp16 = const()[name = string("const_852_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15398_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = const_852_promoted_to_fp16)[name = string("op_15398_cast_fp16")]; + bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; + tensor input_393_cast_fp16 = concat(axis = var_15396, interleave = input_393_interleave_0, values = (hidden_states_319_cast_fp16, var_15398_cast_fp16))[name = string("input_393_cast_fp16")]; + tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; + fp16 var_15393_to_fp16 = const()[name = string("op_15393_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_15393_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; + tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; + tensor var_15412_to_fp16 = const()[name = string("op_15412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756271808)))]; + tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_15412_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_15424 = const()[name = string("op_15424"), val = tensor([0, 2, 1])]; + tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; + tensor var_15425_cast_fp16 = transpose(perm = var_15424, x = x_317_cast_fp16)[name = string("transpose_55")]; + tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_15425_cast_fp16)[name = string("input_395_cast_fp16")]; + string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; + tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; + tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; + int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756274176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762246208))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762467456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768439488))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; + string var_15450_mode_0 = const()[name = string("op_15450_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_15450_cast_fp16 = gelu(mode = var_15450_mode_0, x = x_319_cast_fp16)[name = string("op_15450_cast_fp16")]; + tensor input_397_cast_fp16 = mul(x = var_15450_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768660736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774632768))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; + tensor var_15458_axes_0 = const()[name = string("op_15458_axes_0"), val = tensor([2])]; + tensor var_15458_cast_fp16 = squeeze(axes = var_15458_axes_0, x = e_39_cast_fp16)[name = string("op_15458_cast_fp16")]; + tensor var_15459 = const()[name = string("op_15459"), val = tensor([0, 2, 1])]; + int32 var_15470 = const()[name = string("op_15470"), val = int32(-1)]; + fp16 const_856_promoted_to_fp16 = const()[name = string("const_856_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_321_cast_fp16 = transpose(perm = var_15459, x = var_15458_cast_fp16)[name = string("transpose_54")]; + tensor var_15472_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_856_promoted_to_fp16)[name = string("op_15472_cast_fp16")]; + bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; + tensor input_399_cast_fp16 = concat(axis = var_15470, interleave = input_399_interleave_0, values = (hidden_states_321_cast_fp16, var_15472_cast_fp16))[name = string("input_399_cast_fp16")]; + tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; + fp16 var_15467_to_fp16 = const()[name = string("op_15467_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_15467_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; + tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; + tensor var_15486_to_fp16 = const()[name = string("op_15486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774669696)))]; + tensor hidden_states_323_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_15486_to_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor hidden_states_325_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = hidden_states_323_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; + int32 var_15540 = const()[name = string("op_15540"), val = int32(-1)]; + fp16 const_861_promoted_to_fp16 = const()[name = string("const_861_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15542_cast_fp16 = mul(x = hidden_states_325_cast_fp16, y = const_861_promoted_to_fp16)[name = string("op_15542_cast_fp16")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401_cast_fp16 = concat(axis = var_15540, interleave = input_401_interleave_0, values = (hidden_states_325_cast_fp16, var_15542_cast_fp16))[name = string("input_401_cast_fp16")]; + tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; + fp16 var_15537_to_fp16 = const()[name = string("op_15537_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_15537_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; + tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; + tensor var_15556_to_fp16 = const()[name = string("op_15556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774672064)))]; + tensor hidden_states_327_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_15556_to_fp16)[name = string("hidden_states_327_cast_fp16")]; + tensor var_15567 = const()[name = string("op_15567"), val = tensor([0, 2, 1])]; + tensor var_15570_axes_0 = const()[name = string("op_15570_axes_0"), val = tensor([2])]; + tensor var_15568_cast_fp16 = transpose(perm = var_15567, x = hidden_states_327_cast_fp16)[name = string("transpose_53")]; + tensor var_15570_cast_fp16 = expand_dims(axes = var_15570_axes_0, x = var_15568_cast_fp16)[name = string("op_15570_cast_fp16")]; + string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; + tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; + tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; + int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; + tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_15570_cast_fp16)[name = string("query_states_161")]; + string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; + tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; + tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; + int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; + tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_15570_cast_fp16)[name = string("key_states_201")]; + string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; + tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; + tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; + int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; + tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_15570_cast_fp16)[name = string("value_states_161")]; + tensor var_15612 = const()[name = string("op_15612"), val = tensor([1, 4, 256, 64])]; + tensor var_15613 = reshape(shape = var_15612, x = query_states_161)[name = string("op_15613")]; + tensor var_15618 = const()[name = string("op_15618"), val = tensor([0, 1, 3, 2])]; + tensor var_15623 = const()[name = string("op_15623"), val = tensor([1, 1, 256, 64])]; + tensor var_15624 = reshape(shape = var_15623, x = key_states_201)[name = string("op_15624")]; + tensor var_15629 = const()[name = string("op_15629"), val = tensor([0, 1, 3, 2])]; + tensor var_15634 = const()[name = string("op_15634"), val = tensor([1, 1, 256, 64])]; + tensor var_15635 = reshape(shape = var_15634, x = value_states_161)[name = string("op_15635")]; + tensor var_15640 = const()[name = string("op_15640"), val = tensor([0, 1, 3, 2])]; + int32 var_15651 = const()[name = string("op_15651"), val = int32(-1)]; + fp16 const_866_promoted = const()[name = string("const_866_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_329 = transpose(perm = var_15618, x = var_15613)[name = string("transpose_52")]; + tensor var_15653 = mul(x = hidden_states_329, y = const_866_promoted)[name = string("op_15653")]; + bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; + tensor input_405 = concat(axis = var_15651, interleave = input_405_interleave_0, values = (hidden_states_329, var_15653))[name = string("input_405")]; + tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; + fp16 var_15648_to_fp16 = const()[name = string("op_15648_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_15648_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; + tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; + tensor var_15667_to_fp16 = const()[name = string("op_15667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774674432)))]; + tensor q_41_cast_fp16 = mul(x = normed_487, y = var_15667_to_fp16)[name = string("q_41_cast_fp16")]; + int32 var_15678 = const()[name = string("op_15678"), val = int32(-1)]; + fp16 const_870_promoted = const()[name = string("const_870_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_331 = transpose(perm = var_15629, x = var_15624)[name = string("transpose_51")]; + tensor var_15680 = mul(x = hidden_states_331, y = const_870_promoted)[name = string("op_15680")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407 = concat(axis = var_15678, interleave = input_407_interleave_0, values = (hidden_states_331, var_15680))[name = string("input_407")]; + tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; + fp16 var_15675_to_fp16 = const()[name = string("op_15675_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_15675_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; + tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; + tensor var_15694_to_fp16 = const()[name = string("op_15694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675008)))]; + tensor k_41_cast_fp16 = mul(x = normed_491, y = var_15694_to_fp16)[name = string("k_41_cast_fp16")]; + tensor var_15708_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5)[name = string("op_15708_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_876_promoted_to_fp16 = const()[name = string("const_876_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15729_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_876_promoted_to_fp16)[name = string("op_15729_cast_fp16")]; + int32 var_15731 = const()[name = string("op_15731"), val = int32(-1)]; + bool var_15732_interleave_0 = const()[name = string("op_15732_interleave_0"), val = bool(false)]; + tensor var_15732_cast_fp16 = concat(axis = var_15731, interleave = var_15732_interleave_0, values = (var_15729_cast_fp16, x1_81_cast_fp16))[name = string("op_15732_cast_fp16")]; + tensor var_15733_cast_fp16 = mul(x = var_15732_cast_fp16, y = sin_5)[name = string("op_15733_cast_fp16")]; + tensor query_states_163_cast_fp16 = add(x = var_15708_cast_fp16, y = var_15733_cast_fp16)[name = string("query_states_163_cast_fp16")]; + tensor var_15736_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5)[name = string("op_15736_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_879_promoted_to_fp16 = const()[name = string("const_879_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15757_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_879_promoted_to_fp16)[name = string("op_15757_cast_fp16")]; + int32 var_15759 = const()[name = string("op_15759"), val = int32(-1)]; + bool var_15760_interleave_0 = const()[name = string("op_15760_interleave_0"), val = bool(false)]; + tensor var_15760_cast_fp16 = concat(axis = var_15759, interleave = var_15760_interleave_0, values = (var_15757_cast_fp16, x1_83_cast_fp16))[name = string("op_15760_cast_fp16")]; + tensor var_15761_cast_fp16 = mul(x = var_15760_cast_fp16, y = sin_5)[name = string("op_15761_cast_fp16")]; + tensor key_states_203_cast_fp16 = add(x = var_15736_cast_fp16, y = var_15761_cast_fp16)[name = string("key_states_203_cast_fp16")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([17])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([18])]; + int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; + bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; + tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_362")]; + tensor concat_363_values1_0 = const()[name = string("concat_363_values1_0"), val = tensor([0])]; + tensor concat_363_values3_0 = const()[name = string("concat_363_values3_0"), val = tensor([0])]; + int32 concat_363_axis_0 = const()[name = string("concat_363_axis_0"), val = int32(0)]; + bool concat_363_interleave_0 = const()[name = string("concat_363_interleave_0"), val = bool(false)]; + tensor concat_363 = concat(axis = concat_363_axis_0, interleave = concat_363_interleave_0, values = (expand_dims_244, concat_363_values1_0, end_pos_1, concat_363_values3_0))[name = string("concat_363")]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_362, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_363, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = key_states_203_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_144_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_144")]; + tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([39])]; + tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; + tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; + tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([40])]; + int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; + bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; + tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_366")]; + tensor concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = tensor([0])]; + tensor concat_367_values3_0 = const()[name = string("concat_367_values3_0"), val = tensor([0])]; + int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; + bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; + tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (expand_dims_250, concat_367_values1_0, end_pos_1, concat_367_values3_0))[name = string("concat_367")]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_163 = transpose(perm = var_15640, x = var_15635)[name = string("transpose_50")]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_366, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_367, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = value_states_163, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_145_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_145")]; + tensor var_15860_begin_0 = const()[name = string("op_15860_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_15860_end_0 = const()[name = string("op_15860_end_0"), val = tensor([18, 1, 512, 256])]; + tensor var_15860_end_mask_0 = const()[name = string("op_15860_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15860_cast_fp16 = slice_by_index(begin = var_15860_begin_0, end = var_15860_end_0, end_mask = var_15860_end_mask_0, x = coreml_update_state_93)[name = string("op_15860_cast_fp16")]; + tensor var_15867_begin_0 = const()[name = string("op_15867_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_15867_end_0 = const()[name = string("op_15867_end_0"), val = tensor([40, 1, 512, 256])]; + tensor var_15867_end_mask_0 = const()[name = string("op_15867_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15867_cast_fp16 = slice_by_index(begin = var_15867_begin_0, end = var_15867_end_0, end_mask = var_15867_end_mask_0, x = coreml_update_state_93)[name = string("op_15867_cast_fp16")]; + tensor var_15906 = const()[name = string("op_15906"), val = tensor([1, 4, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_15906, x = var_15860_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_15926 = const()[name = string("op_15926"), val = tensor([1, 4, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_15926, x = var_15867_cast_fp16)[name = string("x_331_cast_fp16")]; + bool var_15953_transpose_x_0 = const()[name = string("op_15953_transpose_x_0"), val = bool(false)]; + bool var_15953_transpose_y_0 = const()[name = string("op_15953_transpose_y_0"), val = bool(true)]; + tensor var_15953 = matmul(transpose_x = var_15953_transpose_x_0, transpose_y = var_15953_transpose_y_0, x = query_states_163_cast_fp16, y = x_325_cast_fp16)[name = string("op_15953")]; + fp16 var_15954_to_fp16 = const()[name = string("op_15954_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_81_cast_fp16 = mul(x = var_15953, y = var_15954_to_fp16)[name = string("attn_weights_81_cast_fp16")]; + tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = mask_slice_1)[name = string("attn_weights_83_cast_fp16")]; + int32 var_15989 = const()[name = string("op_15989"), val = int32(-1)]; + tensor var_15991_cast_fp16 = softmax(axis = var_15989, x = attn_weights_83_cast_fp16)[name = string("op_15991_cast_fp16")]; + tensor concat_372 = const()[name = string("concat_372"), val = tensor([4, 64, 512])]; + tensor reshape_60_cast_fp16 = reshape(shape = concat_372, x = var_15991_cast_fp16)[name = string("reshape_60_cast_fp16")]; + tensor concat_373 = const()[name = string("concat_373"), val = tensor([4, 512, 256])]; + tensor reshape_61_cast_fp16 = reshape(shape = concat_373, x = x_331_cast_fp16)[name = string("reshape_61_cast_fp16")]; + bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; + bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; + tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; + tensor concat_377 = const()[name = string("concat_377"), val = tensor([1, 4, 64, 256])]; + tensor reshape_62_cast_fp16 = reshape(shape = concat_377, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; + tensor var_16003_perm_0 = const()[name = string("op_16003_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16022 = const()[name = string("op_16022"), val = tensor([1, 64, 1024])]; + tensor var_16003_cast_fp16 = transpose(perm = var_16003_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_49")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_16022, x = var_16003_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_16027 = const()[name = string("op_16027"), val = tensor([0, 2, 1])]; + string var_16043_pad_type_0 = const()[name = string("op_16043_pad_type_0"), val = string("valid")]; + int32 var_16043_groups_0 = const()[name = string("op_16043_groups_0"), val = int32(1)]; + tensor var_16043_strides_0 = const()[name = string("op_16043_strides_0"), val = tensor([1])]; + tensor var_16043_pad_0 = const()[name = string("op_16043_pad_0"), val = tensor([0, 0])]; + tensor var_16043_dilations_0 = const()[name = string("op_16043_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775560384))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16028_cast_fp16 = transpose(perm = var_16027, x = attn_output_205_cast_fp16)[name = string("transpose_48")]; + tensor var_16043_cast_fp16 = conv(dilations = var_16043_dilations_0, groups = var_16043_groups_0, pad = var_16043_pad_0, pad_type = var_16043_pad_type_0, strides = var_16043_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_16028_cast_fp16)[name = string("op_16043_cast_fp16")]; + tensor var_16047 = const()[name = string("op_16047"), val = tensor([0, 2, 1])]; + int32 var_16058 = const()[name = string("op_16058"), val = int32(-1)]; + fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_333_cast_fp16 = transpose(perm = var_16047, x = var_16043_cast_fp16)[name = string("transpose_47")]; + tensor var_16060_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_16060_cast_fp16")]; + bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; + tensor input_411_cast_fp16 = concat(axis = var_16058, interleave = input_411_interleave_0, values = (hidden_states_333_cast_fp16, var_16060_cast_fp16))[name = string("input_411_cast_fp16")]; + tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; + fp16 var_16055_to_fp16 = const()[name = string("op_16055_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_16055_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; + tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; + tensor var_16074_to_fp16 = const()[name = string("op_16074_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775597312)))]; + tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_16074_to_fp16)[name = string("attn_output_209_cast_fp16")]; + tensor hidden_states_335_cast_fp16 = add(x = hidden_states_325_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; + int32 var_16087 = const()[name = string("op_16087"), val = int32(-1)]; + fp16 const_895_promoted_to_fp16 = const()[name = string("const_895_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16089_cast_fp16 = mul(x = hidden_states_335_cast_fp16, y = const_895_promoted_to_fp16)[name = string("op_16089_cast_fp16")]; + bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; + tensor input_413_cast_fp16 = concat(axis = var_16087, interleave = input_413_interleave_0, values = (hidden_states_335_cast_fp16, var_16089_cast_fp16))[name = string("input_413_cast_fp16")]; + tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; + fp16 var_16084_to_fp16 = const()[name = string("op_16084_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_16084_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; + tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; + tensor var_16103_to_fp16 = const()[name = string("op_16103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775599680)))]; + tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_16103_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_16115 = const()[name = string("op_16115"), val = tensor([0, 2, 1])]; + tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; + tensor var_16116_cast_fp16 = transpose(perm = var_16115, x = x_333_cast_fp16)[name = string("transpose_46")]; + tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_16116_cast_fp16)[name = string("input_415_cast_fp16")]; + string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; + tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; + tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; + int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775602048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781574080))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781795328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787767360))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; + string var_16141_mode_0 = const()[name = string("op_16141_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16141_cast_fp16 = gelu(mode = var_16141_mode_0, x = x_335_cast_fp16)[name = string("op_16141_cast_fp16")]; + tensor input_417_cast_fp16 = mul(x = var_16141_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787988608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793960640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; + tensor var_16149_axes_0 = const()[name = string("op_16149_axes_0"), val = tensor([2])]; + tensor var_16149_cast_fp16 = squeeze(axes = var_16149_axes_0, x = e_41_cast_fp16)[name = string("op_16149_cast_fp16")]; + tensor var_16150 = const()[name = string("op_16150"), val = tensor([0, 2, 1])]; + int32 var_16161 = const()[name = string("op_16161"), val = int32(-1)]; + fp16 const_899_promoted_to_fp16 = const()[name = string("const_899_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_337_cast_fp16 = transpose(perm = var_16150, x = var_16149_cast_fp16)[name = string("transpose_45")]; + tensor var_16163_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_899_promoted_to_fp16)[name = string("op_16163_cast_fp16")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419_cast_fp16 = concat(axis = var_16161, interleave = input_419_interleave_0, values = (hidden_states_337_cast_fp16, var_16163_cast_fp16))[name = string("input_419_cast_fp16")]; + tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; + fp16 var_16158_to_fp16 = const()[name = string("op_16158_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_16158_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; + tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; + tensor var_16177_to_fp16 = const()[name = string("op_16177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793997568)))]; + tensor hidden_states_339_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_16177_to_fp16)[name = string("hidden_states_339_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = add(x = hidden_states_335_cast_fp16, y = hidden_states_339_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + int32 var_16231 = const()[name = string("op_16231"), val = int32(-1)]; + fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16233_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16233_cast_fp16")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421_cast_fp16 = concat(axis = var_16231, interleave = input_421_interleave_0, values = (hidden_states_341_cast_fp16, var_16233_cast_fp16))[name = string("input_421_cast_fp16")]; + tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; + fp16 var_16228_to_fp16 = const()[name = string("op_16228_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_16228_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; + tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; + tensor var_16247_to_fp16 = const()[name = string("op_16247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793999936)))]; + tensor hidden_states_343_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_16247_to_fp16)[name = string("hidden_states_343_cast_fp16")]; + tensor var_16258 = const()[name = string("op_16258"), val = tensor([0, 2, 1])]; + tensor var_16261_axes_0 = const()[name = string("op_16261_axes_0"), val = tensor([2])]; + tensor var_16259_cast_fp16 = transpose(perm = var_16258, x = hidden_states_343_cast_fp16)[name = string("transpose_44")]; + tensor var_16261_cast_fp16 = expand_dims(axes = var_16261_axes_0, x = var_16259_cast_fp16)[name = string("op_16261_cast_fp16")]; + string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; + tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; + tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; + int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; + tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_16261_cast_fp16)[name = string("query_states_169")]; + string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; + tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; + tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; + int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; + tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_16261_cast_fp16)[name = string("key_states_211")]; + string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; + tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; + tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; + int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; + tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_16261_cast_fp16)[name = string("value_states_169")]; + tensor var_16303 = const()[name = string("op_16303"), val = tensor([1, 4, 256, 64])]; + tensor var_16304 = reshape(shape = var_16303, x = query_states_169)[name = string("op_16304")]; + tensor var_16309 = const()[name = string("op_16309"), val = tensor([0, 1, 3, 2])]; + tensor var_16314 = const()[name = string("op_16314"), val = tensor([1, 1, 256, 64])]; + tensor var_16315 = reshape(shape = var_16314, x = key_states_211)[name = string("op_16315")]; + tensor var_16320 = const()[name = string("op_16320"), val = tensor([0, 1, 3, 2])]; + tensor var_16325 = const()[name = string("op_16325"), val = tensor([1, 1, 256, 64])]; + tensor var_16326 = reshape(shape = var_16325, x = value_states_169)[name = string("op_16326")]; + tensor var_16331 = const()[name = string("op_16331"), val = tensor([0, 1, 3, 2])]; + int32 var_16342 = const()[name = string("op_16342"), val = int32(-1)]; + fp16 const_909_promoted = const()[name = string("const_909_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_345 = transpose(perm = var_16309, x = var_16304)[name = string("transpose_43")]; + tensor var_16344 = mul(x = hidden_states_345, y = const_909_promoted)[name = string("op_16344")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425 = concat(axis = var_16342, interleave = input_425_interleave_0, values = (hidden_states_345, var_16344))[name = string("input_425")]; + tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; + fp16 var_16339_to_fp16 = const()[name = string("op_16339_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_16339_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; + tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; + tensor var_16358_to_fp16 = const()[name = string("op_16358_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002304)))]; + tensor q_43_cast_fp16 = mul(x = normed_511, y = var_16358_to_fp16)[name = string("q_43_cast_fp16")]; + int32 var_16369 = const()[name = string("op_16369"), val = int32(-1)]; + fp16 const_913_promoted = const()[name = string("const_913_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_347 = transpose(perm = var_16320, x = var_16315)[name = string("transpose_42")]; + tensor var_16371 = mul(x = hidden_states_347, y = const_913_promoted)[name = string("op_16371")]; + bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; + tensor input_427 = concat(axis = var_16369, interleave = input_427_interleave_0, values = (hidden_states_347, var_16371))[name = string("input_427")]; + tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; + fp16 var_16366_to_fp16 = const()[name = string("op_16366_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_16366_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; + tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; + tensor var_16385_to_fp16 = const()[name = string("op_16385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002880)))]; + tensor k_43_cast_fp16 = mul(x = normed_515, y = var_16385_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_16399_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5)[name = string("op_16399_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_919_promoted_to_fp16 = const()[name = string("const_919_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16420_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_919_promoted_to_fp16)[name = string("op_16420_cast_fp16")]; + int32 var_16422 = const()[name = string("op_16422"), val = int32(-1)]; + bool var_16423_interleave_0 = const()[name = string("op_16423_interleave_0"), val = bool(false)]; + tensor var_16423_cast_fp16 = concat(axis = var_16422, interleave = var_16423_interleave_0, values = (var_16420_cast_fp16, x1_85_cast_fp16))[name = string("op_16423_cast_fp16")]; + tensor var_16424_cast_fp16 = mul(x = var_16423_cast_fp16, y = sin_5)[name = string("op_16424_cast_fp16")]; + tensor query_states_171_cast_fp16 = add(x = var_16399_cast_fp16, y = var_16424_cast_fp16)[name = string("query_states_171_cast_fp16")]; + tensor var_16427_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5)[name = string("op_16427_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_922_promoted_to_fp16 = const()[name = string("const_922_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16448_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_922_promoted_to_fp16)[name = string("op_16448_cast_fp16")]; + int32 var_16450 = const()[name = string("op_16450"), val = int32(-1)]; + bool var_16451_interleave_0 = const()[name = string("op_16451_interleave_0"), val = bool(false)]; + tensor var_16451_cast_fp16 = concat(axis = var_16450, interleave = var_16451_interleave_0, values = (var_16448_cast_fp16, x1_87_cast_fp16))[name = string("op_16451_cast_fp16")]; + tensor var_16452_cast_fp16 = mul(x = var_16451_cast_fp16, y = sin_5)[name = string("op_16452_cast_fp16")]; + tensor key_states_213_cast_fp16 = add(x = var_16427_cast_fp16, y = var_16452_cast_fp16)[name = string("key_states_213_cast_fp16")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([18])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([19])]; + int32 concat_380_axis_0 = const()[name = string("concat_380_axis_0"), val = int32(0)]; + bool concat_380_interleave_0 = const()[name = string("concat_380_interleave_0"), val = bool(false)]; + tensor concat_380 = concat(axis = concat_380_axis_0, interleave = concat_380_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_380")]; + tensor concat_381_values1_0 = const()[name = string("concat_381_values1_0"), val = tensor([0])]; + tensor concat_381_values3_0 = const()[name = string("concat_381_values3_0"), val = tensor([0])]; + int32 concat_381_axis_0 = const()[name = string("concat_381_axis_0"), val = int32(0)]; + bool concat_381_interleave_0 = const()[name = string("concat_381_interleave_0"), val = bool(false)]; + tensor concat_381 = concat(axis = concat_381_axis_0, interleave = concat_381_interleave_0, values = (expand_dims_256, concat_381_values1_0, end_pos_1, concat_381_values3_0))[name = string("concat_381")]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_380, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_381, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = key_states_213_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_146_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_146")]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([40])]; + tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; + tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; + tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([41])]; + int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; + bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; + tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_384")]; + tensor concat_385_values1_0 = const()[name = string("concat_385_values1_0"), val = tensor([0])]; + tensor concat_385_values3_0 = const()[name = string("concat_385_values3_0"), val = tensor([0])]; + int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; + bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; + tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (expand_dims_262, concat_385_values1_0, end_pos_1, concat_385_values3_0))[name = string("concat_385")]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_171 = transpose(perm = var_16331, x = var_16326)[name = string("transpose_41")]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_384, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_385, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = value_states_171, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_147_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_147")]; + tensor var_16551_begin_0 = const()[name = string("op_16551_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_16551_end_0 = const()[name = string("op_16551_end_0"), val = tensor([19, 1, 512, 256])]; + tensor var_16551_end_mask_0 = const()[name = string("op_16551_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16551_cast_fp16 = slice_by_index(begin = var_16551_begin_0, end = var_16551_end_0, end_mask = var_16551_end_mask_0, x = coreml_update_state_95)[name = string("op_16551_cast_fp16")]; + tensor var_16558_begin_0 = const()[name = string("op_16558_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_16558_end_0 = const()[name = string("op_16558_end_0"), val = tensor([41, 1, 512, 256])]; + tensor var_16558_end_mask_0 = const()[name = string("op_16558_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16558_cast_fp16 = slice_by_index(begin = var_16558_begin_0, end = var_16558_end_0, end_mask = var_16558_end_mask_0, x = coreml_update_state_95)[name = string("op_16558_cast_fp16")]; + tensor var_16597 = const()[name = string("op_16597"), val = tensor([1, 4, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_16597, x = var_16551_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_16617 = const()[name = string("op_16617"), val = tensor([1, 4, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_16617, x = var_16558_cast_fp16)[name = string("x_347_cast_fp16")]; + bool var_16644_transpose_x_0 = const()[name = string("op_16644_transpose_x_0"), val = bool(false)]; + bool var_16644_transpose_y_0 = const()[name = string("op_16644_transpose_y_0"), val = bool(true)]; + tensor var_16644 = matmul(transpose_x = var_16644_transpose_x_0, transpose_y = var_16644_transpose_y_0, x = query_states_171_cast_fp16, y = x_341_cast_fp16)[name = string("op_16644")]; + fp16 var_16645_to_fp16 = const()[name = string("op_16645_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_16644, y = var_16645_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = mask_slice_1)[name = string("attn_weights_87_cast_fp16")]; + int32 var_16680 = const()[name = string("op_16680"), val = int32(-1)]; + tensor var_16682_cast_fp16 = softmax(axis = var_16680, x = attn_weights_87_cast_fp16)[name = string("op_16682_cast_fp16")]; + tensor concat_390 = const()[name = string("concat_390"), val = tensor([4, 64, 512])]; + tensor reshape_63_cast_fp16 = reshape(shape = concat_390, x = var_16682_cast_fp16)[name = string("reshape_63_cast_fp16")]; + tensor concat_391 = const()[name = string("concat_391"), val = tensor([4, 512, 256])]; + tensor reshape_64_cast_fp16 = reshape(shape = concat_391, x = x_347_cast_fp16)[name = string("reshape_64_cast_fp16")]; + bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; + bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; + tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; + tensor concat_395 = const()[name = string("concat_395"), val = tensor([1, 4, 64, 256])]; + tensor reshape_65_cast_fp16 = reshape(shape = concat_395, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; + tensor var_16694_perm_0 = const()[name = string("op_16694_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16713 = const()[name = string("op_16713"), val = tensor([1, 64, 1024])]; + tensor var_16694_cast_fp16 = transpose(perm = var_16694_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_40")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_16713, x = var_16694_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_16718 = const()[name = string("op_16718"), val = tensor([0, 2, 1])]; + string var_16734_pad_type_0 = const()[name = string("op_16734_pad_type_0"), val = string("valid")]; + int32 var_16734_groups_0 = const()[name = string("op_16734_groups_0"), val = int32(1)]; + tensor var_16734_strides_0 = const()[name = string("op_16734_strides_0"), val = tensor([1])]; + tensor var_16734_pad_0 = const()[name = string("op_16734_pad_0"), val = tensor([0, 0])]; + tensor var_16734_dilations_0 = const()[name = string("op_16734_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794003456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794888256))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16719_cast_fp16 = transpose(perm = var_16718, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; + tensor var_16734_cast_fp16 = conv(dilations = var_16734_dilations_0, groups = var_16734_groups_0, pad = var_16734_pad_0, pad_type = var_16734_pad_type_0, strides = var_16734_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_16719_cast_fp16)[name = string("op_16734_cast_fp16")]; + tensor var_16738 = const()[name = string("op_16738"), val = tensor([0, 2, 1])]; + int32 var_16749 = const()[name = string("op_16749"), val = int32(-1)]; + fp16 const_934_promoted_to_fp16 = const()[name = string("const_934_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_349_cast_fp16 = transpose(perm = var_16738, x = var_16734_cast_fp16)[name = string("transpose_38")]; + tensor var_16751_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_934_promoted_to_fp16)[name = string("op_16751_cast_fp16")]; + bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; + tensor input_431_cast_fp16 = concat(axis = var_16749, interleave = input_431_interleave_0, values = (hidden_states_349_cast_fp16, var_16751_cast_fp16))[name = string("input_431_cast_fp16")]; + tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; + fp16 var_16746_to_fp16 = const()[name = string("op_16746_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_16746_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; + tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; + tensor var_16765_to_fp16 = const()[name = string("op_16765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794925184)))]; + tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_16765_to_fp16)[name = string("attn_output_219_cast_fp16")]; + tensor hidden_states_351_cast_fp16 = add(x = hidden_states_341_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; + int32 var_16778 = const()[name = string("op_16778"), val = int32(-1)]; + fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16780_cast_fp16 = mul(x = hidden_states_351_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_16780_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_16778, interleave = input_433_interleave_0, values = (hidden_states_351_cast_fp16, var_16780_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; + fp16 var_16775_to_fp16 = const()[name = string("op_16775_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_16775_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; + tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; + tensor var_16794_to_fp16 = const()[name = string("op_16794_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794927552)))]; + tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_16794_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_16806 = const()[name = string("op_16806"), val = tensor([0, 2, 1])]; + tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; + tensor var_16807_cast_fp16 = transpose(perm = var_16806, x = x_349_cast_fp16)[name = string("transpose_37")]; + tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_16807_cast_fp16)[name = string("input_435_cast_fp16")]; + string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; + tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; + tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; + int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794929920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800901952))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801123200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807095232))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; + string var_16832_mode_0 = const()[name = string("op_16832_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16832_cast_fp16 = gelu(mode = var_16832_mode_0, x = x_351_cast_fp16)[name = string("op_16832_cast_fp16")]; + tensor input_437_cast_fp16 = mul(x = var_16832_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807316480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813288512))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; + tensor var_16840_axes_0 = const()[name = string("op_16840_axes_0"), val = tensor([2])]; + tensor var_16840_cast_fp16 = squeeze(axes = var_16840_axes_0, x = e_43_cast_fp16)[name = string("op_16840_cast_fp16")]; + tensor var_16841 = const()[name = string("op_16841"), val = tensor([0, 2, 1])]; + int32 var_16852 = const()[name = string("op_16852"), val = int32(-1)]; + fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_353_cast_fp16 = transpose(perm = var_16841, x = var_16840_cast_fp16)[name = string("transpose_36")]; + tensor var_16854_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_16854_cast_fp16")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439_cast_fp16 = concat(axis = var_16852, interleave = input_439_interleave_0, values = (hidden_states_353_cast_fp16, var_16854_cast_fp16))[name = string("input_439_cast_fp16")]; + tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; + fp16 var_16849_to_fp16 = const()[name = string("op_16849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_16849_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; + tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; + tensor var_16868_to_fp16 = const()[name = string("op_16868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813325440)))]; + tensor hidden_states_355_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_16868_to_fp16)[name = string("hidden_states_355_cast_fp16")]; + tensor hidden_states_357_cast_fp16 = add(x = hidden_states_351_cast_fp16, y = hidden_states_355_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; + int32 var_16922 = const()[name = string("op_16922"), val = int32(-1)]; + fp16 const_947_promoted_to_fp16 = const()[name = string("const_947_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16924_cast_fp16 = mul(x = hidden_states_357_cast_fp16, y = const_947_promoted_to_fp16)[name = string("op_16924_cast_fp16")]; + bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; + tensor input_441_cast_fp16 = concat(axis = var_16922, interleave = input_441_interleave_0, values = (hidden_states_357_cast_fp16, var_16924_cast_fp16))[name = string("input_441_cast_fp16")]; + tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; + fp16 var_16919_to_fp16 = const()[name = string("op_16919_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_16919_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; + tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; + tensor var_16938_to_fp16 = const()[name = string("op_16938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813327808)))]; + tensor hidden_states_359_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_16938_to_fp16)[name = string("hidden_states_359_cast_fp16")]; + tensor var_16949 = const()[name = string("op_16949"), val = tensor([0, 2, 1])]; + tensor var_16952_axes_0 = const()[name = string("op_16952_axes_0"), val = tensor([2])]; + tensor var_16950_cast_fp16 = transpose(perm = var_16949, x = hidden_states_359_cast_fp16)[name = string("transpose_35")]; + tensor var_16952_cast_fp16 = expand_dims(axes = var_16952_axes_0, x = var_16950_cast_fp16)[name = string("op_16952_cast_fp16")]; + string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; + tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; + tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; + int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; + tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_16952_cast_fp16)[name = string("query_states_177")]; + string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; + tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; + tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; + int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; + tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_16952_cast_fp16)[name = string("key_states_221")]; + string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; + tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; + tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; + int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; + tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_16952_cast_fp16)[name = string("value_states_177")]; + tensor var_16994 = const()[name = string("op_16994"), val = tensor([1, 4, 256, 64])]; + tensor var_16995 = reshape(shape = var_16994, x = query_states_177)[name = string("op_16995")]; + tensor var_17000 = const()[name = string("op_17000"), val = tensor([0, 1, 3, 2])]; + tensor var_17005 = const()[name = string("op_17005"), val = tensor([1, 1, 256, 64])]; + tensor var_17006 = reshape(shape = var_17005, x = key_states_221)[name = string("op_17006")]; + tensor var_17011 = const()[name = string("op_17011"), val = tensor([0, 1, 3, 2])]; + tensor var_17016 = const()[name = string("op_17016"), val = tensor([1, 1, 256, 64])]; + tensor var_17017 = reshape(shape = var_17016, x = value_states_177)[name = string("op_17017")]; + tensor var_17022 = const()[name = string("op_17022"), val = tensor([0, 1, 3, 2])]; + int32 var_17033 = const()[name = string("op_17033"), val = int32(-1)]; + fp16 const_952_promoted = const()[name = string("const_952_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_361 = transpose(perm = var_17000, x = var_16995)[name = string("transpose_34")]; + tensor var_17035 = mul(x = hidden_states_361, y = const_952_promoted)[name = string("op_17035")]; + bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; + tensor input_445 = concat(axis = var_17033, interleave = input_445_interleave_0, values = (hidden_states_361, var_17035))[name = string("input_445")]; + tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; + fp16 var_17030_to_fp16 = const()[name = string("op_17030_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_17030_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; + tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; + tensor var_17049_to_fp16 = const()[name = string("op_17049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330176)))]; + tensor q_45_cast_fp16 = mul(x = normed_535, y = var_17049_to_fp16)[name = string("q_45_cast_fp16")]; + int32 var_17060 = const()[name = string("op_17060"), val = int32(-1)]; + fp16 const_956_promoted = const()[name = string("const_956_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_363 = transpose(perm = var_17011, x = var_17006)[name = string("transpose_33")]; + tensor var_17062 = mul(x = hidden_states_363, y = const_956_promoted)[name = string("op_17062")]; + bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; + tensor input_447 = concat(axis = var_17060, interleave = input_447_interleave_0, values = (hidden_states_363, var_17062))[name = string("input_447")]; + tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; + fp16 var_17057_to_fp16 = const()[name = string("op_17057_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_17057_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; + tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; + tensor var_17076_to_fp16 = const()[name = string("op_17076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330752)))]; + tensor k_45_cast_fp16 = mul(x = normed_539, y = var_17076_to_fp16)[name = string("k_45_cast_fp16")]; + tensor var_17090_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5)[name = string("op_17090_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_962_promoted_to_fp16 = const()[name = string("const_962_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17111_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_962_promoted_to_fp16)[name = string("op_17111_cast_fp16")]; + int32 var_17113 = const()[name = string("op_17113"), val = int32(-1)]; + bool var_17114_interleave_0 = const()[name = string("op_17114_interleave_0"), val = bool(false)]; + tensor var_17114_cast_fp16 = concat(axis = var_17113, interleave = var_17114_interleave_0, values = (var_17111_cast_fp16, x1_89_cast_fp16))[name = string("op_17114_cast_fp16")]; + tensor var_17115_cast_fp16 = mul(x = var_17114_cast_fp16, y = sin_5)[name = string("op_17115_cast_fp16")]; + tensor query_states_179_cast_fp16 = add(x = var_17090_cast_fp16, y = var_17115_cast_fp16)[name = string("query_states_179_cast_fp16")]; + tensor var_17118_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5)[name = string("op_17118_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_965_promoted_to_fp16 = const()[name = string("const_965_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17139_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_965_promoted_to_fp16)[name = string("op_17139_cast_fp16")]; + int32 var_17141 = const()[name = string("op_17141"), val = int32(-1)]; + bool var_17142_interleave_0 = const()[name = string("op_17142_interleave_0"), val = bool(false)]; + tensor var_17142_cast_fp16 = concat(axis = var_17141, interleave = var_17142_interleave_0, values = (var_17139_cast_fp16, x1_91_cast_fp16))[name = string("op_17142_cast_fp16")]; + tensor var_17143_cast_fp16 = mul(x = var_17142_cast_fp16, y = sin_5)[name = string("op_17143_cast_fp16")]; + tensor key_states_223_cast_fp16 = add(x = var_17118_cast_fp16, y = var_17143_cast_fp16)[name = string("key_states_223_cast_fp16")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([19])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([20])]; + int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; + bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; + tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_398")]; + tensor concat_399_values1_0 = const()[name = string("concat_399_values1_0"), val = tensor([0])]; + tensor concat_399_values3_0 = const()[name = string("concat_399_values3_0"), val = tensor([0])]; + int32 concat_399_axis_0 = const()[name = string("concat_399_axis_0"), val = int32(0)]; + bool concat_399_interleave_0 = const()[name = string("concat_399_interleave_0"), val = bool(false)]; + tensor concat_399 = concat(axis = concat_399_axis_0, interleave = concat_399_interleave_0, values = (expand_dims_268, concat_399_values1_0, end_pos_1, concat_399_values3_0))[name = string("concat_399")]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_398, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_399, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = key_states_223_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_148_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_148")]; + tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([41])]; + tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; + tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([42])]; + int32 concat_402_axis_0 = const()[name = string("concat_402_axis_0"), val = int32(0)]; + bool concat_402_interleave_0 = const()[name = string("concat_402_interleave_0"), val = bool(false)]; + tensor concat_402 = concat(axis = concat_402_axis_0, interleave = concat_402_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_402")]; + tensor concat_403_values1_0 = const()[name = string("concat_403_values1_0"), val = tensor([0])]; + tensor concat_403_values3_0 = const()[name = string("concat_403_values3_0"), val = tensor([0])]; + int32 concat_403_axis_0 = const()[name = string("concat_403_axis_0"), val = int32(0)]; + bool concat_403_interleave_0 = const()[name = string("concat_403_interleave_0"), val = bool(false)]; + tensor concat_403 = concat(axis = concat_403_axis_0, interleave = concat_403_interleave_0, values = (expand_dims_274, concat_403_values1_0, end_pos_1, concat_403_values3_0))[name = string("concat_403")]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_179 = transpose(perm = var_17022, x = var_17017)[name = string("transpose_32")]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_402, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_403, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = value_states_179, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_149_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_149")]; + tensor var_17242_begin_0 = const()[name = string("op_17242_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_17242_end_0 = const()[name = string("op_17242_end_0"), val = tensor([20, 1, 512, 256])]; + tensor var_17242_end_mask_0 = const()[name = string("op_17242_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17242_cast_fp16 = slice_by_index(begin = var_17242_begin_0, end = var_17242_end_0, end_mask = var_17242_end_mask_0, x = coreml_update_state_97)[name = string("op_17242_cast_fp16")]; + tensor var_17249_begin_0 = const()[name = string("op_17249_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_17249_end_0 = const()[name = string("op_17249_end_0"), val = tensor([42, 1, 512, 256])]; + tensor var_17249_end_mask_0 = const()[name = string("op_17249_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17249_cast_fp16 = slice_by_index(begin = var_17249_begin_0, end = var_17249_end_0, end_mask = var_17249_end_mask_0, x = coreml_update_state_97)[name = string("op_17249_cast_fp16")]; + tensor var_17288 = const()[name = string("op_17288"), val = tensor([1, 4, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_17288, x = var_17242_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_17308 = const()[name = string("op_17308"), val = tensor([1, 4, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_17308, x = var_17249_cast_fp16)[name = string("x_363_cast_fp16")]; + bool var_17335_transpose_x_0 = const()[name = string("op_17335_transpose_x_0"), val = bool(false)]; + bool var_17335_transpose_y_0 = const()[name = string("op_17335_transpose_y_0"), val = bool(true)]; + tensor var_17335 = matmul(transpose_x = var_17335_transpose_x_0, transpose_y = var_17335_transpose_y_0, x = query_states_179_cast_fp16, y = x_357_cast_fp16)[name = string("op_17335")]; + fp16 var_17336_to_fp16 = const()[name = string("op_17336_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_89_cast_fp16 = mul(x = var_17335, y = var_17336_to_fp16)[name = string("attn_weights_89_cast_fp16")]; + tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = mask_slice_1)[name = string("attn_weights_91_cast_fp16")]; + int32 var_17371 = const()[name = string("op_17371"), val = int32(-1)]; + tensor var_17373_cast_fp16 = softmax(axis = var_17371, x = attn_weights_91_cast_fp16)[name = string("op_17373_cast_fp16")]; + tensor concat_408 = const()[name = string("concat_408"), val = tensor([4, 64, 512])]; + tensor reshape_66_cast_fp16 = reshape(shape = concat_408, x = var_17373_cast_fp16)[name = string("reshape_66_cast_fp16")]; + tensor concat_409 = const()[name = string("concat_409"), val = tensor([4, 512, 256])]; + tensor reshape_67_cast_fp16 = reshape(shape = concat_409, x = x_363_cast_fp16)[name = string("reshape_67_cast_fp16")]; + bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; + bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; + tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; + tensor concat_413 = const()[name = string("concat_413"), val = tensor([1, 4, 64, 256])]; + tensor reshape_68_cast_fp16 = reshape(shape = concat_413, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; + tensor var_17385_perm_0 = const()[name = string("op_17385_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17404 = const()[name = string("op_17404"), val = tensor([1, 64, 1024])]; + tensor var_17385_cast_fp16 = transpose(perm = var_17385_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_31")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_17404, x = var_17385_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_17409 = const()[name = string("op_17409"), val = tensor([0, 2, 1])]; + string var_17425_pad_type_0 = const()[name = string("op_17425_pad_type_0"), val = string("valid")]; + int32 var_17425_groups_0 = const()[name = string("op_17425_groups_0"), val = int32(1)]; + tensor var_17425_strides_0 = const()[name = string("op_17425_strides_0"), val = tensor([1])]; + tensor var_17425_pad_0 = const()[name = string("op_17425_pad_0"), val = tensor([0, 0])]; + tensor var_17425_dilations_0 = const()[name = string("op_17425_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813331328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814216128))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17410_cast_fp16 = transpose(perm = var_17409, x = attn_output_225_cast_fp16)[name = string("transpose_30")]; + tensor var_17425_cast_fp16 = conv(dilations = var_17425_dilations_0, groups = var_17425_groups_0, pad = var_17425_pad_0, pad_type = var_17425_pad_type_0, strides = var_17425_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_17410_cast_fp16)[name = string("op_17425_cast_fp16")]; + tensor var_17429 = const()[name = string("op_17429"), val = tensor([0, 2, 1])]; + int32 var_17440 = const()[name = string("op_17440"), val = int32(-1)]; + fp16 const_977_promoted_to_fp16 = const()[name = string("const_977_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_365_cast_fp16 = transpose(perm = var_17429, x = var_17425_cast_fp16)[name = string("transpose_29")]; + tensor var_17442_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_977_promoted_to_fp16)[name = string("op_17442_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_17440, interleave = input_451_interleave_0, values = (hidden_states_365_cast_fp16, var_17442_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; + fp16 var_17437_to_fp16 = const()[name = string("op_17437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_17437_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; + tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; + tensor var_17456_to_fp16 = const()[name = string("op_17456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814253056)))]; + tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_17456_to_fp16)[name = string("attn_output_229_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = add(x = hidden_states_357_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + int32 var_17469 = const()[name = string("op_17469"), val = int32(-1)]; + fp16 const_981_promoted_to_fp16 = const()[name = string("const_981_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17471_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = const_981_promoted_to_fp16)[name = string("op_17471_cast_fp16")]; + bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; + tensor input_453_cast_fp16 = concat(axis = var_17469, interleave = input_453_interleave_0, values = (hidden_states_367_cast_fp16, var_17471_cast_fp16))[name = string("input_453_cast_fp16")]; + tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; + fp16 var_17466_to_fp16 = const()[name = string("op_17466_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_17466_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; + tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; + tensor var_17485_to_fp16 = const()[name = string("op_17485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814255424)))]; + tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_17485_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_17497 = const()[name = string("op_17497"), val = tensor([0, 2, 1])]; + tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; + tensor var_17498_cast_fp16 = transpose(perm = var_17497, x = x_365_cast_fp16)[name = string("transpose_28")]; + tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_17498_cast_fp16)[name = string("input_455_cast_fp16")]; + string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; + tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; + tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; + int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814257792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820229824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820451072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826423104))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; + string var_17523_mode_0 = const()[name = string("op_17523_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17523_cast_fp16 = gelu(mode = var_17523_mode_0, x = x_367_cast_fp16)[name = string("op_17523_cast_fp16")]; + tensor input_457_cast_fp16 = mul(x = var_17523_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826644352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832616384))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; + tensor var_17531_axes_0 = const()[name = string("op_17531_axes_0"), val = tensor([2])]; + tensor var_17531_cast_fp16 = squeeze(axes = var_17531_axes_0, x = e_45_cast_fp16)[name = string("op_17531_cast_fp16")]; + tensor var_17532 = const()[name = string("op_17532"), val = tensor([0, 2, 1])]; + int32 var_17543 = const()[name = string("op_17543"), val = int32(-1)]; + fp16 const_985_promoted_to_fp16 = const()[name = string("const_985_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_369_cast_fp16 = transpose(perm = var_17532, x = var_17531_cast_fp16)[name = string("transpose_27")]; + tensor var_17545_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_985_promoted_to_fp16)[name = string("op_17545_cast_fp16")]; + bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; + tensor input_459_cast_fp16 = concat(axis = var_17543, interleave = input_459_interleave_0, values = (hidden_states_369_cast_fp16, var_17545_cast_fp16))[name = string("input_459_cast_fp16")]; + tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; + fp16 var_17540_to_fp16 = const()[name = string("op_17540_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_17540_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; + tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; + tensor var_17559_to_fp16 = const()[name = string("op_17559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832653312)))]; + tensor hidden_states_371_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_17559_to_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor hidden_states_373_cast_fp16 = add(x = hidden_states_367_cast_fp16, y = hidden_states_371_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; + int32 var_17613 = const()[name = string("op_17613"), val = int32(-1)]; + fp16 const_990_promoted_to_fp16 = const()[name = string("const_990_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17615_cast_fp16 = mul(x = hidden_states_373_cast_fp16, y = const_990_promoted_to_fp16)[name = string("op_17615_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_17613, interleave = input_461_interleave_0, values = (hidden_states_373_cast_fp16, var_17615_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; + fp16 var_17610_to_fp16 = const()[name = string("op_17610_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_17610_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; + tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; + tensor var_17629_to_fp16 = const()[name = string("op_17629_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832655680)))]; + tensor hidden_states_375_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_17629_to_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor var_17640 = const()[name = string("op_17640"), val = tensor([0, 2, 1])]; + tensor var_17643_axes_0 = const()[name = string("op_17643_axes_0"), val = tensor([2])]; + tensor var_17641_cast_fp16 = transpose(perm = var_17640, x = hidden_states_375_cast_fp16)[name = string("transpose_26")]; + tensor var_17643_cast_fp16 = expand_dims(axes = var_17643_axes_0, x = var_17641_cast_fp16)[name = string("op_17643_cast_fp16")]; + string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; + tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; + tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; + int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; + tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_17643_cast_fp16)[name = string("query_states_185")]; + string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; + tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; + tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; + int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; + tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_17643_cast_fp16)[name = string("key_states_231")]; + string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; + tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; + tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; + int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; + tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_17643_cast_fp16)[name = string("value_states_185")]; + tensor var_17685 = const()[name = string("op_17685"), val = tensor([1, 4, 256, 64])]; + tensor var_17686 = reshape(shape = var_17685, x = query_states_185)[name = string("op_17686")]; + tensor var_17691 = const()[name = string("op_17691"), val = tensor([0, 1, 3, 2])]; + tensor var_17696 = const()[name = string("op_17696"), val = tensor([1, 1, 256, 64])]; + tensor var_17697 = reshape(shape = var_17696, x = key_states_231)[name = string("op_17697")]; + tensor var_17702 = const()[name = string("op_17702"), val = tensor([0, 1, 3, 2])]; + tensor var_17707 = const()[name = string("op_17707"), val = tensor([1, 1, 256, 64])]; + tensor var_17708 = reshape(shape = var_17707, x = value_states_185)[name = string("op_17708")]; + tensor var_17713 = const()[name = string("op_17713"), val = tensor([0, 1, 3, 2])]; + int32 var_17724 = const()[name = string("op_17724"), val = int32(-1)]; + fp16 const_995_promoted = const()[name = string("const_995_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_377 = transpose(perm = var_17691, x = var_17686)[name = string("transpose_25")]; + tensor var_17726 = mul(x = hidden_states_377, y = const_995_promoted)[name = string("op_17726")]; + bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; + tensor input_465 = concat(axis = var_17724, interleave = input_465_interleave_0, values = (hidden_states_377, var_17726))[name = string("input_465")]; + tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; + fp16 var_17721_to_fp16 = const()[name = string("op_17721_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_17721_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; + tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; + tensor var_17740_to_fp16 = const()[name = string("op_17740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658048)))]; + tensor q_47_cast_fp16 = mul(x = normed_559, y = var_17740_to_fp16)[name = string("q_47_cast_fp16")]; + int32 var_17751 = const()[name = string("op_17751"), val = int32(-1)]; + fp16 const_999_promoted = const()[name = string("const_999_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_379 = transpose(perm = var_17702, x = var_17697)[name = string("transpose_24")]; + tensor var_17753 = mul(x = hidden_states_379, y = const_999_promoted)[name = string("op_17753")]; + bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; + tensor input_467 = concat(axis = var_17751, interleave = input_467_interleave_0, values = (hidden_states_379, var_17753))[name = string("input_467")]; + tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; + fp16 var_17748_to_fp16 = const()[name = string("op_17748_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_17748_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; + tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; + tensor var_17767_to_fp16 = const()[name = string("op_17767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658624)))]; + tensor k_47_cast_fp16 = mul(x = normed_563, y = var_17767_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_17781_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_35)[name = string("op_17781_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_1005_promoted_to_fp16 = const()[name = string("const_1005_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17802_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_1005_promoted_to_fp16)[name = string("op_17802_cast_fp16")]; + int32 var_17804 = const()[name = string("op_17804"), val = int32(-1)]; + bool var_17805_interleave_0 = const()[name = string("op_17805_interleave_0"), val = bool(false)]; + tensor var_17805_cast_fp16 = concat(axis = var_17804, interleave = var_17805_interleave_0, values = (var_17802_cast_fp16, x1_93_cast_fp16))[name = string("op_17805_cast_fp16")]; + tensor var_17806_cast_fp16 = mul(x = var_17805_cast_fp16, y = sin_35)[name = string("op_17806_cast_fp16")]; + tensor query_states_187_cast_fp16 = add(x = var_17781_cast_fp16, y = var_17806_cast_fp16)[name = string("query_states_187_cast_fp16")]; + tensor var_17809_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_35)[name = string("op_17809_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_1008_promoted_to_fp16 = const()[name = string("const_1008_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17830_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_1008_promoted_to_fp16)[name = string("op_17830_cast_fp16")]; + int32 var_17832 = const()[name = string("op_17832"), val = int32(-1)]; + bool var_17833_interleave_0 = const()[name = string("op_17833_interleave_0"), val = bool(false)]; + tensor var_17833_cast_fp16 = concat(axis = var_17832, interleave = var_17833_interleave_0, values = (var_17830_cast_fp16, x1_95_cast_fp16))[name = string("op_17833_cast_fp16")]; + tensor var_17834_cast_fp16 = mul(x = var_17833_cast_fp16, y = sin_35)[name = string("op_17834_cast_fp16")]; + tensor key_states_233_cast_fp16 = add(x = var_17809_cast_fp16, y = var_17834_cast_fp16)[name = string("key_states_233_cast_fp16")]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_233_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_150_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_150")]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_187 = transpose(perm = var_17713, x = var_17708)[name = string("transpose_23")]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = value_states_187, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_151_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_151")]; + tensor var_17933_begin_0 = const()[name = string("op_17933_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_17933_end_0 = const()[name = string("op_17933_end_0"), val = tensor([4, 1, 4096, 256])]; + tensor var_17933_end_mask_0 = const()[name = string("op_17933_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17933_cast_fp16 = slice_by_index(begin = var_17933_begin_0, end = var_17933_end_0, end_mask = var_17933_end_mask_0, x = coreml_update_state_99)[name = string("op_17933_cast_fp16")]; + tensor var_17940_begin_0 = const()[name = string("op_17940_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_17940_end_0 = const()[name = string("op_17940_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_17940_end_mask_0 = const()[name = string("op_17940_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_17940_cast_fp16 = slice_by_index(begin = var_17940_begin_0, end = var_17940_end_0, end_mask = var_17940_end_mask_0, x = coreml_update_state_99)[name = string("op_17940_cast_fp16")]; + tensor var_17979 = const()[name = string("op_17979"), val = tensor([1, 4, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_17979, x = var_17933_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_17999 = const()[name = string("op_17999"), val = tensor([1, 4, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_17999, x = var_17940_cast_fp16)[name = string("x_379_cast_fp16")]; + bool var_18026_transpose_x_0 = const()[name = string("op_18026_transpose_x_0"), val = bool(false)]; + bool var_18026_transpose_y_0 = const()[name = string("op_18026_transpose_y_0"), val = bool(true)]; + tensor var_18026 = matmul(transpose_x = var_18026_transpose_x_0, transpose_y = var_18026_transpose_y_0, x = query_states_187_cast_fp16, y = x_373_cast_fp16)[name = string("op_18026")]; + fp16 var_18027_to_fp16 = const()[name = string("op_18027_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_93_cast_fp16 = mul(x = var_18026, y = var_18027_to_fp16)[name = string("attn_weights_93_cast_fp16")]; + tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; + int32 var_18062 = const()[name = string("op_18062"), val = int32(-1)]; + tensor var_18064_cast_fp16 = softmax(axis = var_18062, x = attn_weights_95_cast_fp16)[name = string("op_18064_cast_fp16")]; + tensor concat_426 = const()[name = string("concat_426"), val = tensor([4, 64, 4096])]; + tensor reshape_69_cast_fp16 = reshape(shape = concat_426, x = var_18064_cast_fp16)[name = string("reshape_69_cast_fp16")]; + tensor concat_427 = const()[name = string("concat_427"), val = tensor([4, 4096, 256])]; + tensor reshape_70_cast_fp16 = reshape(shape = concat_427, x = x_379_cast_fp16)[name = string("reshape_70_cast_fp16")]; + bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; + bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; + tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; + tensor concat_431 = const()[name = string("concat_431"), val = tensor([1, 4, 64, 256])]; + tensor reshape_71_cast_fp16 = reshape(shape = concat_431, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; + tensor var_18076_perm_0 = const()[name = string("op_18076_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_18095 = const()[name = string("op_18095"), val = tensor([1, 64, 1024])]; + tensor var_18076_cast_fp16 = transpose(perm = var_18076_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_22")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_18095, x = var_18076_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_18100 = const()[name = string("op_18100"), val = tensor([0, 2, 1])]; + string var_18116_pad_type_0 = const()[name = string("op_18116_pad_type_0"), val = string("valid")]; + int32 var_18116_groups_0 = const()[name = string("op_18116_groups_0"), val = int32(1)]; + tensor var_18116_strides_0 = const()[name = string("op_18116_strides_0"), val = tensor([1])]; + tensor var_18116_pad_0 = const()[name = string("op_18116_pad_0"), val = tensor([0, 0])]; + tensor var_18116_dilations_0 = const()[name = string("op_18116_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832659200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833544000))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_18101_cast_fp16 = transpose(perm = var_18100, x = attn_output_235_cast_fp16)[name = string("transpose_21")]; + tensor var_18116_cast_fp16 = conv(dilations = var_18116_dilations_0, groups = var_18116_groups_0, pad = var_18116_pad_0, pad_type = var_18116_pad_type_0, strides = var_18116_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_18101_cast_fp16)[name = string("op_18116_cast_fp16")]; + tensor var_18120 = const()[name = string("op_18120"), val = tensor([0, 2, 1])]; + int32 var_18131 = const()[name = string("op_18131"), val = int32(-1)]; + fp16 const_1020_promoted_to_fp16 = const()[name = string("const_1020_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_381_cast_fp16 = transpose(perm = var_18120, x = var_18116_cast_fp16)[name = string("transpose_20")]; + tensor var_18133_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_1020_promoted_to_fp16)[name = string("op_18133_cast_fp16")]; + bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; + tensor input_471_cast_fp16 = concat(axis = var_18131, interleave = input_471_interleave_0, values = (hidden_states_381_cast_fp16, var_18133_cast_fp16))[name = string("input_471_cast_fp16")]; + tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; + fp16 var_18128_to_fp16 = const()[name = string("op_18128_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_18128_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; + tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; + tensor var_18147_to_fp16 = const()[name = string("op_18147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833580928)))]; + tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_18147_to_fp16)[name = string("attn_output_239_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = add(x = hidden_states_373_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + int32 var_18160 = const()[name = string("op_18160"), val = int32(-1)]; + fp16 const_1024_promoted_to_fp16 = const()[name = string("const_1024_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18162_cast_fp16 = mul(x = hidden_states_383_cast_fp16, y = const_1024_promoted_to_fp16)[name = string("op_18162_cast_fp16")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473_cast_fp16 = concat(axis = var_18160, interleave = input_473_interleave_0, values = (hidden_states_383_cast_fp16, var_18162_cast_fp16))[name = string("input_473_cast_fp16")]; + tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; + fp16 var_18157_to_fp16 = const()[name = string("op_18157_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_18157_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; + tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; + tensor var_18176_to_fp16 = const()[name = string("op_18176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833583296)))]; + tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_18176_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_18188 = const()[name = string("op_18188"), val = tensor([0, 2, 1])]; + tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; + tensor var_18189_cast_fp16 = transpose(perm = var_18188, x = x_381_cast_fp16)[name = string("transpose_19")]; + tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_18189_cast_fp16)[name = string("input_475_cast_fp16")]; + string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; + tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; + tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; + int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833585664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839557696))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839778944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845750976))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; + string var_18214_mode_0 = const()[name = string("op_18214_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_18214_cast_fp16 = gelu(mode = var_18214_mode_0, x = x_383_cast_fp16)[name = string("op_18214_cast_fp16")]; + tensor input_477_cast_fp16 = mul(x = var_18214_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845972224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851944256))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; + tensor var_18222_axes_0 = const()[name = string("op_18222_axes_0"), val = tensor([2])]; + tensor var_18222_cast_fp16 = squeeze(axes = var_18222_axes_0, x = e_47_cast_fp16)[name = string("op_18222_cast_fp16")]; + tensor var_18223 = const()[name = string("op_18223"), val = tensor([0, 2, 1])]; + int32 var_18234 = const()[name = string("op_18234"), val = int32(-1)]; + fp16 const_1028_promoted_to_fp16 = const()[name = string("const_1028_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_385_cast_fp16 = transpose(perm = var_18223, x = var_18222_cast_fp16)[name = string("transpose_18")]; + tensor var_18236_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_1028_promoted_to_fp16)[name = string("op_18236_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_18234, interleave = input_479_interleave_0, values = (hidden_states_385_cast_fp16, var_18236_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; + fp16 var_18231_to_fp16 = const()[name = string("op_18231_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_18231_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; + tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; + tensor var_18250_to_fp16 = const()[name = string("op_18250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851981184)))]; + tensor hidden_states_387_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_18250_to_fp16)[name = string("hidden_states_387_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_383_cast_fp16, y = hidden_states_387_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + int32 var_18304 = const()[name = string("op_18304"), val = int32(-1)]; + fp16 const_1033_promoted_to_fp16 = const()[name = string("const_1033_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18306_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = const_1033_promoted_to_fp16)[name = string("op_18306_cast_fp16")]; + bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; + tensor input_481_cast_fp16 = concat(axis = var_18304, interleave = input_481_interleave_0, values = (hidden_states_389_cast_fp16, var_18306_cast_fp16))[name = string("input_481_cast_fp16")]; + tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; + fp16 var_18301_to_fp16 = const()[name = string("op_18301_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_18301_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; + tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; + tensor var_18320_to_fp16 = const()[name = string("op_18320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851983552)))]; + tensor hidden_states_391_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_18320_to_fp16)[name = string("hidden_states_391_cast_fp16")]; + tensor var_18331 = const()[name = string("op_18331"), val = tensor([0, 2, 1])]; + tensor var_18334_axes_0 = const()[name = string("op_18334_axes_0"), val = tensor([2])]; + tensor var_18332_cast_fp16 = transpose(perm = var_18331, x = hidden_states_391_cast_fp16)[name = string("transpose_17")]; + tensor var_18334_cast_fp16 = expand_dims(axes = var_18334_axes_0, x = var_18332_cast_fp16)[name = string("op_18334_cast_fp16")]; + string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; + tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; + tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; + int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; + tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_18334_cast_fp16)[name = string("query_states_193")]; + string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; + tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; + tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; + int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; + tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_18334_cast_fp16)[name = string("key_states_241")]; + string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; + tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; + tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; + int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; + tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_18334_cast_fp16)[name = string("value_states_193")]; + tensor var_18376 = const()[name = string("op_18376"), val = tensor([1, 4, 256, 64])]; + tensor var_18377 = reshape(shape = var_18376, x = query_states_193)[name = string("op_18377")]; + tensor var_18382 = const()[name = string("op_18382"), val = tensor([0, 1, 3, 2])]; + tensor var_18387 = const()[name = string("op_18387"), val = tensor([1, 1, 256, 64])]; + tensor var_18388 = reshape(shape = var_18387, x = key_states_241)[name = string("op_18388")]; + tensor var_18393 = const()[name = string("op_18393"), val = tensor([0, 1, 3, 2])]; + tensor var_18398 = const()[name = string("op_18398"), val = tensor([1, 1, 256, 64])]; + tensor var_18399 = reshape(shape = var_18398, x = value_states_193)[name = string("op_18399")]; + tensor var_18404 = const()[name = string("op_18404"), val = tensor([0, 1, 3, 2])]; + int32 var_18415 = const()[name = string("op_18415"), val = int32(-1)]; + fp16 const_1038_promoted = const()[name = string("const_1038_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_393 = transpose(perm = var_18382, x = var_18377)[name = string("transpose_16")]; + tensor var_18417 = mul(x = hidden_states_393, y = const_1038_promoted)[name = string("op_18417")]; + bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; + tensor input_485 = concat(axis = var_18415, interleave = input_485_interleave_0, values = (hidden_states_393, var_18417))[name = string("input_485")]; + tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; + fp16 var_18412_to_fp16 = const()[name = string("op_18412_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_18412_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; + tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; + tensor var_18431_to_fp16 = const()[name = string("op_18431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851985920)))]; + tensor q_49_cast_fp16 = mul(x = normed_583, y = var_18431_to_fp16)[name = string("q_49_cast_fp16")]; + int32 var_18442 = const()[name = string("op_18442"), val = int32(-1)]; + fp16 const_1042_promoted = const()[name = string("const_1042_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_395 = transpose(perm = var_18393, x = var_18388)[name = string("transpose_15")]; + tensor var_18444 = mul(x = hidden_states_395, y = const_1042_promoted)[name = string("op_18444")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487 = concat(axis = var_18442, interleave = input_487_interleave_0, values = (hidden_states_395, var_18444))[name = string("input_487")]; + tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; + fp16 var_18439_to_fp16 = const()[name = string("op_18439_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_18439_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; + tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; + tensor var_18458_to_fp16 = const()[name = string("op_18458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851986496)))]; + tensor k_49_cast_fp16 = mul(x = normed_587, y = var_18458_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_18472_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5)[name = string("op_18472_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_1048_promoted_to_fp16 = const()[name = string("const_1048_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18493_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_1048_promoted_to_fp16)[name = string("op_18493_cast_fp16")]; + int32 var_18495 = const()[name = string("op_18495"), val = int32(-1)]; + bool var_18496_interleave_0 = const()[name = string("op_18496_interleave_0"), val = bool(false)]; + tensor var_18496_cast_fp16 = concat(axis = var_18495, interleave = var_18496_interleave_0, values = (var_18493_cast_fp16, x1_97_cast_fp16))[name = string("op_18496_cast_fp16")]; + tensor var_18497_cast_fp16 = mul(x = var_18496_cast_fp16, y = sin_5)[name = string("op_18497_cast_fp16")]; + tensor query_states_195_cast_fp16 = add(x = var_18472_cast_fp16, y = var_18497_cast_fp16)[name = string("query_states_195_cast_fp16")]; + tensor var_18500_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5)[name = string("op_18500_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_1051_promoted_to_fp16 = const()[name = string("const_1051_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18521_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_1051_promoted_to_fp16)[name = string("op_18521_cast_fp16")]; + int32 var_18523 = const()[name = string("op_18523"), val = int32(-1)]; + bool var_18524_interleave_0 = const()[name = string("op_18524_interleave_0"), val = bool(false)]; + tensor var_18524_cast_fp16 = concat(axis = var_18523, interleave = var_18524_interleave_0, values = (var_18521_cast_fp16, x1_99_cast_fp16))[name = string("op_18524_cast_fp16")]; + tensor var_18525_cast_fp16 = mul(x = var_18524_cast_fp16, y = sin_5)[name = string("op_18525_cast_fp16")]; + tensor key_states_243_cast_fp16 = add(x = var_18500_cast_fp16, y = var_18525_cast_fp16)[name = string("key_states_243_cast_fp16")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([20])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([21])]; + int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; + bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; + tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_434")]; + tensor concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = tensor([0])]; + tensor concat_435_values3_0 = const()[name = string("concat_435_values3_0"), val = tensor([0])]; + int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; + bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; + tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (expand_dims_292, concat_435_values1_0, end_pos_1, concat_435_values3_0))[name = string("concat_435")]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_434, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_435, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = key_states_243_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_152_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_152")]; + tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([42])]; + tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; + tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; + tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([43])]; + int32 concat_438_axis_0 = const()[name = string("concat_438_axis_0"), val = int32(0)]; + bool concat_438_interleave_0 = const()[name = string("concat_438_interleave_0"), val = bool(false)]; + tensor concat_438 = concat(axis = concat_438_axis_0, interleave = concat_438_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_438")]; + tensor concat_439_values1_0 = const()[name = string("concat_439_values1_0"), val = tensor([0])]; + tensor concat_439_values3_0 = const()[name = string("concat_439_values3_0"), val = tensor([0])]; + int32 concat_439_axis_0 = const()[name = string("concat_439_axis_0"), val = int32(0)]; + bool concat_439_interleave_0 = const()[name = string("concat_439_interleave_0"), val = bool(false)]; + tensor concat_439 = concat(axis = concat_439_axis_0, interleave = concat_439_interleave_0, values = (expand_dims_298, concat_439_values1_0, end_pos_1, concat_439_values3_0))[name = string("concat_439")]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_195 = transpose(perm = var_18404, x = var_18399)[name = string("transpose_14")]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_438, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_439, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = value_states_195, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_153_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_153")]; + tensor var_18624_begin_0 = const()[name = string("op_18624_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_18624_end_0 = const()[name = string("op_18624_end_0"), val = tensor([21, 1, 512, 256])]; + tensor var_18624_end_mask_0 = const()[name = string("op_18624_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_18624_cast_fp16 = slice_by_index(begin = var_18624_begin_0, end = var_18624_end_0, end_mask = var_18624_end_mask_0, x = coreml_update_state_101)[name = string("op_18624_cast_fp16")]; + tensor var_18631_begin_0 = const()[name = string("op_18631_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_18631_end_0 = const()[name = string("op_18631_end_0"), val = tensor([43, 1, 512, 256])]; + tensor var_18631_end_mask_0 = const()[name = string("op_18631_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_18631_cast_fp16 = slice_by_index(begin = var_18631_begin_0, end = var_18631_end_0, end_mask = var_18631_end_mask_0, x = coreml_update_state_101)[name = string("op_18631_cast_fp16")]; + tensor var_18670 = const()[name = string("op_18670"), val = tensor([1, 4, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_18670, x = var_18624_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_18690 = const()[name = string("op_18690"), val = tensor([1, 4, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_18690, x = var_18631_cast_fp16)[name = string("x_395_cast_fp16")]; + bool var_18717_transpose_x_0 = const()[name = string("op_18717_transpose_x_0"), val = bool(false)]; + bool var_18717_transpose_y_0 = const()[name = string("op_18717_transpose_y_0"), val = bool(true)]; + tensor var_18717 = matmul(transpose_x = var_18717_transpose_x_0, transpose_y = var_18717_transpose_y_0, x = query_states_195_cast_fp16, y = x_389_cast_fp16)[name = string("op_18717")]; + fp16 var_18718_to_fp16 = const()[name = string("op_18718_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_18717, y = var_18718_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = mask_slice_1)[name = string("attn_weights_99_cast_fp16")]; + int32 var_18753 = const()[name = string("op_18753"), val = int32(-1)]; + tensor var_18755_cast_fp16 = softmax(axis = var_18753, x = attn_weights_99_cast_fp16)[name = string("op_18755_cast_fp16")]; + tensor concat_444 = const()[name = string("concat_444"), val = tensor([4, 64, 512])]; + tensor reshape_72_cast_fp16 = reshape(shape = concat_444, x = var_18755_cast_fp16)[name = string("reshape_72_cast_fp16")]; + tensor concat_445 = const()[name = string("concat_445"), val = tensor([4, 512, 256])]; + tensor reshape_73_cast_fp16 = reshape(shape = concat_445, x = x_395_cast_fp16)[name = string("reshape_73_cast_fp16")]; + bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; + bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; + tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; + tensor concat_449 = const()[name = string("concat_449"), val = tensor([1, 4, 64, 256])]; + tensor reshape_74_cast_fp16 = reshape(shape = concat_449, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; + tensor var_18767_perm_0 = const()[name = string("op_18767_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_18786 = const()[name = string("op_18786"), val = tensor([1, 64, 1024])]; + tensor var_18767_cast_fp16 = transpose(perm = var_18767_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_13")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_18786, x = var_18767_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_18791 = const()[name = string("op_18791"), val = tensor([0, 2, 1])]; + string var_18807_pad_type_0 = const()[name = string("op_18807_pad_type_0"), val = string("valid")]; + int32 var_18807_groups_0 = const()[name = string("op_18807_groups_0"), val = int32(1)]; + tensor var_18807_strides_0 = const()[name = string("op_18807_strides_0"), val = tensor([1])]; + tensor var_18807_pad_0 = const()[name = string("op_18807_pad_0"), val = tensor([0, 0])]; + tensor var_18807_dilations_0 = const()[name = string("op_18807_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851987072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852871872))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_18792_cast_fp16 = transpose(perm = var_18791, x = attn_output_245_cast_fp16)[name = string("transpose_12")]; + tensor var_18807_cast_fp16 = conv(dilations = var_18807_dilations_0, groups = var_18807_groups_0, pad = var_18807_pad_0, pad_type = var_18807_pad_type_0, strides = var_18807_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_18792_cast_fp16)[name = string("op_18807_cast_fp16")]; + tensor var_18811 = const()[name = string("op_18811"), val = tensor([0, 2, 1])]; + int32 var_18822 = const()[name = string("op_18822"), val = int32(-1)]; + fp16 const_1063_promoted_to_fp16 = const()[name = string("const_1063_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_397_cast_fp16 = transpose(perm = var_18811, x = var_18807_cast_fp16)[name = string("transpose_11")]; + tensor var_18824_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_1063_promoted_to_fp16)[name = string("op_18824_cast_fp16")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491_cast_fp16 = concat(axis = var_18822, interleave = input_491_interleave_0, values = (hidden_states_397_cast_fp16, var_18824_cast_fp16))[name = string("input_491_cast_fp16")]; + tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; + fp16 var_18819_to_fp16 = const()[name = string("op_18819_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_18819_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; + tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; + tensor var_18838_to_fp16 = const()[name = string("op_18838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852908800)))]; + tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_18838_to_fp16)[name = string("attn_output_249_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + int32 var_18851 = const()[name = string("op_18851"), val = int32(-1)]; + fp16 const_1067_promoted_to_fp16 = const()[name = string("const_1067_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18853_cast_fp16 = mul(x = hidden_states_399_cast_fp16, y = const_1067_promoted_to_fp16)[name = string("op_18853_cast_fp16")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493_cast_fp16 = concat(axis = var_18851, interleave = input_493_interleave_0, values = (hidden_states_399_cast_fp16, var_18853_cast_fp16))[name = string("input_493_cast_fp16")]; + tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; + fp16 var_18848_to_fp16 = const()[name = string("op_18848_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_18848_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; + tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; + tensor var_18867_to_fp16 = const()[name = string("op_18867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852911168)))]; + tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_18867_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_18879 = const()[name = string("op_18879"), val = tensor([0, 2, 1])]; + tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; + tensor var_18880_cast_fp16 = transpose(perm = var_18879, x = x_397_cast_fp16)[name = string("transpose_10")]; + tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_18880_cast_fp16)[name = string("input_495_cast_fp16")]; + string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; + tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; + tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; + int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852913536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858885568))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859106816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865078848))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; + string var_18905_mode_0 = const()[name = string("op_18905_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_18905_cast_fp16 = gelu(mode = var_18905_mode_0, x = x_399_cast_fp16)[name = string("op_18905_cast_fp16")]; + tensor input_497_cast_fp16 = mul(x = var_18905_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865300096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871272128))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; + tensor var_18913_axes_0 = const()[name = string("op_18913_axes_0"), val = tensor([2])]; + tensor var_18913_cast_fp16 = squeeze(axes = var_18913_axes_0, x = e_49_cast_fp16)[name = string("op_18913_cast_fp16")]; + tensor var_18914 = const()[name = string("op_18914"), val = tensor([0, 2, 1])]; + int32 var_18925 = const()[name = string("op_18925"), val = int32(-1)]; + fp16 const_1071_promoted_to_fp16 = const()[name = string("const_1071_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_401_cast_fp16 = transpose(perm = var_18914, x = var_18913_cast_fp16)[name = string("transpose_9")]; + tensor var_18927_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_1071_promoted_to_fp16)[name = string("op_18927_cast_fp16")]; + bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; + tensor input_499_cast_fp16 = concat(axis = var_18925, interleave = input_499_interleave_0, values = (hidden_states_401_cast_fp16, var_18927_cast_fp16))[name = string("input_499_cast_fp16")]; + tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; + fp16 var_18922_to_fp16 = const()[name = string("op_18922_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_18922_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; + tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; + tensor var_18941_to_fp16 = const()[name = string("op_18941_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871309056)))]; + tensor hidden_states_403_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_18941_to_fp16)[name = string("hidden_states_403_cast_fp16")]; + tensor hidden_states_405_cast_fp16 = add(x = hidden_states_399_cast_fp16, y = hidden_states_403_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + int32 var_18995 = const()[name = string("op_18995"), val = int32(-1)]; + fp16 const_1076_promoted_to_fp16 = const()[name = string("const_1076_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18997_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = const_1076_promoted_to_fp16)[name = string("op_18997_cast_fp16")]; + bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; + tensor input_501_cast_fp16 = concat(axis = var_18995, interleave = input_501_interleave_0, values = (hidden_states_405_cast_fp16, var_18997_cast_fp16))[name = string("input_501_cast_fp16")]; + tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; + fp16 var_18992_to_fp16 = const()[name = string("op_18992_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_18992_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; + tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; + tensor var_19011_to_fp16 = const()[name = string("op_19011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871311424)))]; + tensor hidden_states_407_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_19011_to_fp16)[name = string("hidden_states_407_cast_fp16")]; + tensor var_19022 = const()[name = string("op_19022"), val = tensor([0, 2, 1])]; + tensor var_19025_axes_0 = const()[name = string("op_19025_axes_0"), val = tensor([2])]; + tensor var_19023_cast_fp16 = transpose(perm = var_19022, x = hidden_states_407_cast_fp16)[name = string("transpose_8")]; + tensor var_19025_cast_fp16 = expand_dims(axes = var_19025_axes_0, x = var_19023_cast_fp16)[name = string("op_19025_cast_fp16")]; + string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; + tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; + tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; + int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; + tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_19025_cast_fp16)[name = string("query_states_201")]; + string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; + tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; + tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; + int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; + tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_19025_cast_fp16)[name = string("key_states_251")]; + string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; + tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; + tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; + int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; + tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_19025_cast_fp16)[name = string("value_states_201")]; + tensor var_19067 = const()[name = string("op_19067"), val = tensor([1, 4, 256, 64])]; + tensor var_19068 = reshape(shape = var_19067, x = query_states_201)[name = string("op_19068")]; + tensor var_19073 = const()[name = string("op_19073"), val = tensor([0, 1, 3, 2])]; + tensor var_19078 = const()[name = string("op_19078"), val = tensor([1, 1, 256, 64])]; + tensor var_19079 = reshape(shape = var_19078, x = key_states_251)[name = string("op_19079")]; + tensor var_19084 = const()[name = string("op_19084"), val = tensor([0, 1, 3, 2])]; + tensor var_19089 = const()[name = string("op_19089"), val = tensor([1, 1, 256, 64])]; + tensor var_19090 = reshape(shape = var_19089, x = value_states_201)[name = string("op_19090")]; + tensor var_19095 = const()[name = string("op_19095"), val = tensor([0, 1, 3, 2])]; + int32 var_19106 = const()[name = string("op_19106"), val = int32(-1)]; + fp16 const_1081_promoted = const()[name = string("const_1081_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_409 = transpose(perm = var_19073, x = var_19068)[name = string("transpose_7")]; + tensor var_19108 = mul(x = hidden_states_409, y = const_1081_promoted)[name = string("op_19108")]; + bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; + tensor input_505 = concat(axis = var_19106, interleave = input_505_interleave_0, values = (hidden_states_409, var_19108))[name = string("input_505")]; + tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; + fp16 var_19103_to_fp16 = const()[name = string("op_19103_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_19103_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; + tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; + tensor var_19122_to_fp16 = const()[name = string("op_19122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871313792)))]; + tensor q_cast_fp16 = mul(x = normed_607, y = var_19122_to_fp16)[name = string("q_cast_fp16")]; + int32 var_19133 = const()[name = string("op_19133"), val = int32(-1)]; + fp16 const_1085_promoted = const()[name = string("const_1085_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_411 = transpose(perm = var_19084, x = var_19079)[name = string("transpose_6")]; + tensor var_19135 = mul(x = hidden_states_411, y = const_1085_promoted)[name = string("op_19135")]; + bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; + tensor input_507 = concat(axis = var_19133, interleave = input_507_interleave_0, values = (hidden_states_411, var_19135))[name = string("input_507")]; + tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; + fp16 var_19130_to_fp16 = const()[name = string("op_19130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_19130_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; + tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; + tensor var_19149_to_fp16 = const()[name = string("op_19149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314368)))]; + tensor k_cast_fp16 = mul(x = normed_611, y = var_19149_to_fp16)[name = string("k_cast_fp16")]; + tensor var_19163_cast_fp16 = mul(x = q_cast_fp16, y = cos_5)[name = string("op_19163_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_1091_promoted_to_fp16 = const()[name = string("const_1091_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19184_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_1091_promoted_to_fp16)[name = string("op_19184_cast_fp16")]; + int32 var_19186 = const()[name = string("op_19186"), val = int32(-1)]; + bool var_19187_interleave_0 = const()[name = string("op_19187_interleave_0"), val = bool(false)]; + tensor var_19187_cast_fp16 = concat(axis = var_19186, interleave = var_19187_interleave_0, values = (var_19184_cast_fp16, x1_101_cast_fp16))[name = string("op_19187_cast_fp16")]; + tensor var_19188_cast_fp16 = mul(x = var_19187_cast_fp16, y = sin_5)[name = string("op_19188_cast_fp16")]; + tensor query_states_203_cast_fp16 = add(x = var_19163_cast_fp16, y = var_19188_cast_fp16)[name = string("query_states_203_cast_fp16")]; + tensor var_19191_cast_fp16 = mul(x = k_cast_fp16, y = cos_5)[name = string("op_19191_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_1094_promoted_to_fp16 = const()[name = string("const_1094_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19212_cast_fp16 = mul(x = x2_cast_fp16, y = const_1094_promoted_to_fp16)[name = string("op_19212_cast_fp16")]; + int32 var_19214 = const()[name = string("op_19214"), val = int32(-1)]; + bool var_19215_interleave_0 = const()[name = string("op_19215_interleave_0"), val = bool(false)]; + tensor var_19215_cast_fp16 = concat(axis = var_19214, interleave = var_19215_interleave_0, values = (var_19212_cast_fp16, x1_cast_fp16))[name = string("op_19215_cast_fp16")]; + tensor var_19216_cast_fp16 = mul(x = var_19215_cast_fp16, y = sin_5)[name = string("op_19216_cast_fp16")]; + tensor key_states_253_cast_fp16 = add(x = var_19191_cast_fp16, y = var_19216_cast_fp16)[name = string("key_states_253_cast_fp16")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([21])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([22])]; + int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; + bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; + tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_452")]; + tensor concat_453_values1_0 = const()[name = string("concat_453_values1_0"), val = tensor([0])]; + tensor concat_453_values3_0 = const()[name = string("concat_453_values3_0"), val = tensor([0])]; + int32 concat_453_axis_0 = const()[name = string("concat_453_axis_0"), val = int32(0)]; + bool concat_453_interleave_0 = const()[name = string("concat_453_interleave_0"), val = bool(false)]; + tensor concat_453 = concat(axis = concat_453_axis_0, interleave = concat_453_interleave_0, values = (expand_dims_304, concat_453_values1_0, end_pos_1, concat_453_values3_0))[name = string("concat_453")]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_452, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_453, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = key_states_253_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_154_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_154")]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([43])]; + tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; + tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; + tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([44])]; + int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; + bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; + tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_456")]; + tensor concat_457_values1_0 = const()[name = string("concat_457_values1_0"), val = tensor([0])]; + tensor concat_457_values3_0 = const()[name = string("concat_457_values3_0"), val = tensor([0])]; + int32 concat_457_axis_0 = const()[name = string("concat_457_axis_0"), val = int32(0)]; + bool concat_457_interleave_0 = const()[name = string("concat_457_interleave_0"), val = bool(false)]; + tensor concat_457 = concat(axis = concat_457_axis_0, interleave = concat_457_interleave_0, values = (expand_dims_310, concat_457_values1_0, end_pos_1, concat_457_values3_0))[name = string("concat_457")]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_203 = transpose(perm = var_19095, x = var_19090)[name = string("transpose_5")]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_456, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_457, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = value_states_203, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_155_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_155")]; + tensor var_19315_begin_0 = const()[name = string("op_19315_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_19315_end_0 = const()[name = string("op_19315_end_0"), val = tensor([22, 1, 512, 256])]; + tensor var_19315_end_mask_0 = const()[name = string("op_19315_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_19315_cast_fp16 = slice_by_index(begin = var_19315_begin_0, end = var_19315_end_0, end_mask = var_19315_end_mask_0, x = coreml_update_state_103)[name = string("op_19315_cast_fp16")]; + tensor var_19322_begin_0 = const()[name = string("op_19322_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_19322_end_0 = const()[name = string("op_19322_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_19322_end_mask_0 = const()[name = string("op_19322_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_19322_cast_fp16 = slice_by_index(begin = var_19322_begin_0, end = var_19322_end_0, end_mask = var_19322_end_mask_0, x = coreml_update_state_103)[name = string("op_19322_cast_fp16")]; + tensor var_19361 = const()[name = string("op_19361"), val = tensor([1, 4, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_19361, x = var_19315_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_19381 = const()[name = string("op_19381"), val = tensor([1, 4, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_19381, x = var_19322_cast_fp16)[name = string("x_411_cast_fp16")]; + bool var_19408_transpose_x_0 = const()[name = string("op_19408_transpose_x_0"), val = bool(false)]; + bool var_19408_transpose_y_0 = const()[name = string("op_19408_transpose_y_0"), val = bool(true)]; + tensor var_19408 = matmul(transpose_x = var_19408_transpose_x_0, transpose_y = var_19408_transpose_y_0, x = query_states_203_cast_fp16, y = x_405_cast_fp16)[name = string("op_19408")]; + fp16 var_19409_to_fp16 = const()[name = string("op_19409_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_101_cast_fp16 = mul(x = var_19408, y = var_19409_to_fp16)[name = string("attn_weights_101_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = mask_slice_1)[name = string("attn_weights_cast_fp16")]; + int32 var_19444 = const()[name = string("op_19444"), val = int32(-1)]; + tensor var_19446_cast_fp16 = softmax(axis = var_19444, x = attn_weights_cast_fp16)[name = string("op_19446_cast_fp16")]; + tensor concat_462 = const()[name = string("concat_462"), val = tensor([4, 64, 512])]; + tensor reshape_75_cast_fp16 = reshape(shape = concat_462, x = var_19446_cast_fp16)[name = string("reshape_75_cast_fp16")]; + tensor concat_463 = const()[name = string("concat_463"), val = tensor([4, 512, 256])]; + tensor reshape_76_cast_fp16 = reshape(shape = concat_463, x = x_411_cast_fp16)[name = string("reshape_76_cast_fp16")]; + bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; + bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; + tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; + tensor concat_467 = const()[name = string("concat_467"), val = tensor([1, 4, 64, 256])]; + tensor reshape_77_cast_fp16 = reshape(shape = concat_467, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; + tensor var_19458_perm_0 = const()[name = string("op_19458_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_19477 = const()[name = string("op_19477"), val = tensor([1, 64, 1024])]; + tensor var_19458_cast_fp16 = transpose(perm = var_19458_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_4")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_19477, x = var_19458_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_19482 = const()[name = string("op_19482"), val = tensor([0, 2, 1])]; + string var_19498_pad_type_0 = const()[name = string("op_19498_pad_type_0"), val = string("valid")]; + int32 var_19498_groups_0 = const()[name = string("op_19498_groups_0"), val = int32(1)]; + tensor var_19498_strides_0 = const()[name = string("op_19498_strides_0"), val = tensor([1])]; + tensor var_19498_pad_0 = const()[name = string("op_19498_pad_0"), val = tensor([0, 0])]; + tensor var_19498_dilations_0 = const()[name = string("op_19498_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872199744))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_19483_cast_fp16 = transpose(perm = var_19482, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; + tensor var_19498_cast_fp16 = conv(dilations = var_19498_dilations_0, groups = var_19498_groups_0, pad = var_19498_pad_0, pad_type = var_19498_pad_type_0, strides = var_19498_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_19483_cast_fp16)[name = string("op_19498_cast_fp16")]; + tensor var_19502 = const()[name = string("op_19502"), val = tensor([0, 2, 1])]; + int32 var_19513 = const()[name = string("op_19513"), val = int32(-1)]; + fp16 const_1106_promoted_to_fp16 = const()[name = string("const_1106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_413_cast_fp16 = transpose(perm = var_19502, x = var_19498_cast_fp16)[name = string("transpose_2")]; + tensor var_19515_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_1106_promoted_to_fp16)[name = string("op_19515_cast_fp16")]; + bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; + tensor input_511_cast_fp16 = concat(axis = var_19513, interleave = input_511_interleave_0, values = (hidden_states_413_cast_fp16, var_19515_cast_fp16))[name = string("input_511_cast_fp16")]; + tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; + fp16 var_19510_to_fp16 = const()[name = string("op_19510_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_19510_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; + tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; + tensor var_19529_to_fp16 = const()[name = string("op_19529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872236672)))]; + tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_19529_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; + int32 var_19542 = const()[name = string("op_19542"), val = int32(-1)]; + fp16 const_1110_promoted_to_fp16 = const()[name = string("const_1110_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19544_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = const_1110_promoted_to_fp16)[name = string("op_19544_cast_fp16")]; + bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; + tensor input_513_cast_fp16 = concat(axis = var_19542, interleave = input_513_interleave_0, values = (hidden_states_415_cast_fp16, var_19544_cast_fp16))[name = string("input_513_cast_fp16")]; + tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; + fp16 var_19539_to_fp16 = const()[name = string("op_19539_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_19539_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; + tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; + tensor var_19558_to_fp16 = const()[name = string("op_19558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872239040)))]; + tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_19558_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_19570 = const()[name = string("op_19570"), val = tensor([0, 2, 1])]; + tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; + tensor var_19571_cast_fp16 = transpose(perm = var_19570, x = x_413_cast_fp16)[name = string("transpose_1")]; + tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_19571_cast_fp16)[name = string("input_515_cast_fp16")]; + string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; + tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; + tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; + int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872241408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878213440))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878434688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884406720))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; + string var_19596_mode_0 = const()[name = string("op_19596_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_19596_cast_fp16 = gelu(mode = var_19596_mode_0, x = x_cast_fp16)[name = string("op_19596_cast_fp16")]; + tensor input_517_cast_fp16 = mul(x = var_19596_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884627968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890600000))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; + tensor var_19604_axes_0 = const()[name = string("op_19604_axes_0"), val = tensor([2])]; + tensor var_19604_cast_fp16 = squeeze(axes = var_19604_axes_0, x = e_cast_fp16)[name = string("op_19604_cast_fp16")]; + tensor var_19605 = const()[name = string("op_19605"), val = tensor([0, 2, 1])]; + int32 var_19616 = const()[name = string("op_19616"), val = int32(-1)]; + fp16 const_1114_promoted_to_fp16 = const()[name = string("const_1114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_417_cast_fp16 = transpose(perm = var_19605, x = var_19604_cast_fp16)[name = string("transpose_0")]; + tensor var_19618_cast_fp16 = mul(x = hidden_states_417_cast_fp16, y = const_1114_promoted_to_fp16)[name = string("op_19618_cast_fp16")]; + bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; + tensor input_519_cast_fp16 = concat(axis = var_19616, interleave = input_519_interleave_0, values = (hidden_states_417_cast_fp16, var_19618_cast_fp16))[name = string("input_519_cast_fp16")]; + tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; + fp16 var_19613_to_fp16 = const()[name = string("op_19613_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_19613_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; + tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; + tensor var_19632_to_fp16 = const()[name = string("op_19632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890636928)))]; + tensor hidden_states_419_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_19632_to_fp16)[name = string("hidden_states_419_cast_fp16")]; + tensor hidden_states_421_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; + int32 var_19645 = const()[name = string("op_19645"), val = int32(-1)]; + fp16 const_1118_promoted_to_fp16 = const()[name = string("const_1118_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19647_cast_fp16 = mul(x = hidden_states_421_cast_fp16, y = const_1118_promoted_to_fp16)[name = string("op_19647_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_19645, interleave = input_interleave_0, values = (hidden_states_421_cast_fp16, var_19647_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; + fp16 var_19642_to_fp16 = const()[name = string("op_19642_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_19642_to_fp16, x = input_cast_fp16)[name = string("normed_625_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; + tensor var_19661_to_fp16 = const()[name = string("op_19661_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890639296)))]; + tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_19661_to_fp16)[name = string("hidden_states_cast_fp16")]; + tensor var_19672_begin_0 = const()[name = string("op_19672_begin_0"), val = tensor([0, 0, 0])]; + tensor var_19672_end_0 = const()[name = string("op_19672_end_0"), val = tensor([1, 1, 1152])]; + tensor var_19672_end_mask_0 = const()[name = string("op_19672_end_mask_0"), val = tensor([true, false, true])]; + tensor output_hidden_states = slice_by_index(begin = var_19672_begin_0, end = var_19672_end_0, end_mask = var_19672_end_mask_0, x = hidden_states_cast_fp16)[name = string("op_19672_cast_fp16")]; + } -> (output_hidden_states); + func prefill_rotate(tensor causal_mask, tensor current_pos, tensor input_ids, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { + tensor model_model_embed_tokens_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301990016))))[name = string("model_model_embed_tokens_weight_palettized")]; + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335544512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336429312))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336462144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336683392))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336691648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336912896))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336921152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337805952))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337838784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338060032))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338068288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338289536))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338297792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339182592))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339215424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339436672))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339444928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339666176))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339674432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340559232))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340592064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340813312))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340821568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341042816))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341051072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341935872))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342189952))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342198208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342419456))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342427712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343312512))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343345344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343566592))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343574848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343796096))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343804352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344689152))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344721984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344943232))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344951488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345172736))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345180992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346065792))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346098624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346319872))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346328128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346549376))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346557632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347442432))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347475264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347696512))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347926016))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347934272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348819072))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348851904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349073152))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349081408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349302656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350195712))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350228544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350449792))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350458048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350679296))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350687552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351572352))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351826432))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351834688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352055936))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352948992))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352981824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353203072))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353211328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353432576))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353440832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354325632))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354358464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354579712))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354587968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354809216))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354817472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355702272))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355735104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355956352))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355964608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356185856))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356194112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357078912))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357111744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357332992))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357341248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357562496))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357570752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358455552))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358488384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358709632))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358717888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358939136))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358947392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359832192))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359865024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360086272))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360094528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360315776))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360324032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361208832))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361241664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361462912))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361471168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361692416))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361700672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362585472))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362618304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362839552))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362847808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363069056))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363077312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363962112))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363994944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364216192))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364445696))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364453952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365338752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365371584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365592832))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365601088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365822336))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365830592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366715392))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366748224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366969472))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367198976))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367207232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368092032))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368124864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368346112))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368354368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368575616))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368583872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369468672))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369501504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369722752))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369731008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369952256))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369960512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370845312))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370878144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371099392))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371107648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371328896))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + int32 hidden_states_1_batch_dims_0 = const()[name = string("hidden_states_1_batch_dims_0"), val = int32(0)]; + bool hidden_states_1_validate_indices_0 = const()[name = string("hidden_states_1_validate_indices_0"), val = bool(false)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_208 = const()[name = string("slice_by_index_208"), val = int32(262144)]; + tensor add_0 = add(x = input_ids, y = slice_by_index_208)[name = string("add_0")]; + tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; + tensor add_0_1 = add(x = select_0, y = slice_by_index_0)[name = string("add_0_1")]; + tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 hidden_states_1_axis_0 = const()[name = string("hidden_states_1_axis_0"), val = int32(0)]; + tensor hidden_states_1 = gather(axis = hidden_states_1_axis_0, batch_dims = hidden_states_1_batch_dims_0, indices = select_0_1, validate_indices = hidden_states_1_validate_indices_0, x = model_model_embed_tokens_weight_palettized)[name = string("hidden_states_1")]; + fp16 var_1607_to_fp16 = const()[name = string("op_1607_to_fp16"), val = fp16(0x1.0f8p+5)]; + tensor hidden_states_3_cast_fp16 = mul(x = hidden_states_1, y = var_1607_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + int32 var_1627_axis_0 = const()[name = string("op_1627_axis_0"), val = int32(1)]; + int32 var_1627_batch_dims_0 = const()[name = string("op_1627_batch_dims_0"), val = int32(0)]; + bool var_1627_validate_indices_0 = const()[name = string("op_1627_validate_indices_0"), val = bool(false)]; + tensor var_1619_to_fp16 = const()[name = string("op_1619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375531520)))]; + string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; + tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_0")]; + tensor var_1627_cast_fp16_cast_uint16 = gather(axis = var_1627_axis_0, batch_dims = var_1627_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1627_validate_indices_0, x = var_1619_to_fp16)[name = string("op_1627_cast_fp16_cast_uint16")]; + tensor var_1631 = const()[name = string("op_1631"), val = tensor([1, 64, 1, 256])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1631, x = var_1627_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1641_axis_0 = const()[name = string("op_1641_axis_0"), val = int32(1)]; + int32 var_1641_batch_dims_0 = const()[name = string("op_1641_batch_dims_0"), val = int32(0)]; + bool var_1641_validate_indices_0 = const()[name = string("op_1641_validate_indices_0"), val = bool(false)]; + tensor var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371337152)))]; + tensor var_1641_cast_fp16_cast_uint16 = gather(axis = var_1641_axis_0, batch_dims = var_1641_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1641_validate_indices_0, x = var_1633_to_fp16)[name = string("op_1641_cast_fp16_cast_uint16")]; + tensor var_1645 = const()[name = string("op_1645"), val = tensor([1, 64, 1, 256])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1645, x = var_1641_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1666 = const()[name = string("op_1666"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_1668 = mul(x = hidden_states_3_cast_fp16, y = const_1_promoted)[name = string("op_1668")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1 = concat(axis = var_1666, interleave = input_1_interleave_0, values = (hidden_states_3_cast_fp16, var_1668))[name = string("input_1")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1663_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3")]; + tensor var_1682_to_fp16 = const()[name = string("op_1682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379725888)))]; + tensor hidden_states_7_cast_fp16 = mul(x = normed_3, y = var_1682_to_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_1693 = const()[name = string("op_1693"), val = tensor([0, 2, 1])]; + tensor var_1696_axes_0 = const()[name = string("op_1696_axes_0"), val = tensor([2])]; + tensor var_1694_cast_fp16 = transpose(perm = var_1693, x = hidden_states_7_cast_fp16)[name = string("transpose_237")]; + tensor var_1696_cast_fp16 = expand_dims(axes = var_1696_axes_0, x = var_1694_cast_fp16)[name = string("op_1696_cast_fp16")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1696_cast_fp16)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1696_cast_fp16)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1696_cast_fp16)[name = string("value_states_1")]; + tensor var_1738 = const()[name = string("op_1738"), val = tensor([1, 4, 256, 64])]; + tensor var_1739 = reshape(shape = var_1738, x = query_states_1)[name = string("op_1739")]; + tensor var_1744 = const()[name = string("op_1744"), val = tensor([0, 1, 3, 2])]; + tensor var_1749 = const()[name = string("op_1749"), val = tensor([1, 1, 256, 64])]; + tensor var_1750 = reshape(shape = var_1749, x = key_states_1)[name = string("op_1750")]; + tensor var_1755 = const()[name = string("op_1755"), val = tensor([0, 1, 3, 2])]; + tensor var_1760 = const()[name = string("op_1760"), val = tensor([1, 1, 256, 64])]; + tensor var_1761 = reshape(shape = var_1760, x = value_states_1)[name = string("op_1761")]; + tensor var_1766 = const()[name = string("op_1766"), val = tensor([0, 1, 3, 2])]; + int32 var_1777 = const()[name = string("op_1777"), val = int32(-1)]; + fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_9 = transpose(perm = var_1744, x = var_1739)[name = string("transpose_236")]; + tensor var_1779 = mul(x = hidden_states_9, y = const_6_promoted)[name = string("op_1779")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1777, interleave = input_5_interleave_0, values = (hidden_states_9, var_1779))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1774_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor var_1793_to_fp16 = const()[name = string("op_1793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728256)))]; + tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1793_to_fp16)[name = string("q_1_cast_fp16")]; + int32 var_1804 = const()[name = string("op_1804"), val = int32(-1)]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_11 = transpose(perm = var_1755, x = var_1750)[name = string("transpose_235")]; + tensor var_1806 = mul(x = hidden_states_11, y = const_10_promoted)[name = string("op_1806")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1804, interleave = input_7_interleave_0, values = (hidden_states_11, var_1806))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1801_to_fp16 = const()[name = string("op_1801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1801_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor var_1820_to_fp16 = const()[name = string("op_1820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379728832)))]; + tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1820_to_fp16)[name = string("k_1_cast_fp16")]; + tensor var_1826 = const()[name = string("op_1826"), val = tensor([0, 2, 1, 3])]; + tensor var_1832 = const()[name = string("op_1832"), val = tensor([0, 2, 1, 3])]; + tensor cos_5 = transpose(perm = var_1826, x = cos_1_cast_fp16)[name = string("transpose_234")]; + tensor var_1834_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5)[name = string("op_1834_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1855_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1855_cast_fp16")]; + int32 var_1857 = const()[name = string("op_1857"), val = int32(-1)]; + bool var_1858_interleave_0 = const()[name = string("op_1858_interleave_0"), val = bool(false)]; + tensor var_1858_cast_fp16 = concat(axis = var_1857, interleave = var_1858_interleave_0, values = (var_1855_cast_fp16, x1_1_cast_fp16))[name = string("op_1858_cast_fp16")]; + tensor sin_5 = transpose(perm = var_1832, x = sin_1_cast_fp16)[name = string("transpose_233")]; + tensor var_1859_cast_fp16 = mul(x = var_1858_cast_fp16, y = sin_5)[name = string("op_1859_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_1834_cast_fp16, y = var_1859_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor var_1862_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5)[name = string("op_1862_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1883_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1883_cast_fp16")]; + int32 var_1885 = const()[name = string("op_1885"), val = int32(-1)]; + bool var_1886_interleave_0 = const()[name = string("op_1886_interleave_0"), val = bool(false)]; + tensor var_1886_cast_fp16 = concat(axis = var_1885, interleave = var_1886_interleave_0, values = (var_1883_cast_fp16, x1_3_cast_fp16))[name = string("op_1886_cast_fp16")]; + tensor var_1887_cast_fp16 = mul(x = var_1886_cast_fp16, y = sin_5)[name = string("op_1887_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_1862_cast_fp16, y = var_1887_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; + tensor key_slice_1_begin_0 = const()[name = string("key_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor key_slice_1_end_0 = const()[name = string("key_slice_1_end_0"), val = tensor([1, 1, 512, 256])]; + tensor key_slice_1_end_mask_0 = const()[name = string("key_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_1_cast_fp16 = slice_by_index(begin = key_slice_1_begin_0, end = key_slice_1_end_0, end_mask = key_slice_1_end_mask_0, x = read_state_0)[name = string("key_slice_1_cast_fp16")]; + tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1924_end_0 = const()[name = string("op_1924_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = var_1924_end_0, end_mask = var_1924_end_mask_0, x = key_slice_1_cast_fp16)[name = string("op_1924_cast_fp16")]; + int32 var_1951 = const()[name = string("op_1951"), val = int32(2)]; + bool shifted_key_1_interleave_0 = const()[name = string("shifted_key_1_interleave_0"), val = bool(false)]; + tensor shifted_key_1_cast_fp16 = concat(axis = var_1951, interleave = shifted_key_1_interleave_0, values = (var_1924_cast_fp16, key_states_3_cast_fp16))[name = string("shifted_key_1_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = shifted_key_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_156_write_state")]; + tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_156")]; + tensor value_slice_1_begin_0 = const()[name = string("value_slice_1_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor value_slice_1_end_0 = const()[name = string("value_slice_1_end_0"), val = tensor([23, 1, 512, 256])]; + tensor value_slice_1_end_mask_0 = const()[name = string("value_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_1_cast_fp16 = slice_by_index(begin = value_slice_1_begin_0, end = value_slice_1_end_0, end_mask = value_slice_1_end_mask_0, x = coreml_update_state_52)[name = string("value_slice_1_cast_fp16")]; + tensor var_1994_begin_0 = const()[name = string("op_1994_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1994_end_0 = const()[name = string("op_1994_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_1994_end_mask_0 = const()[name = string("op_1994_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1994_cast_fp16 = slice_by_index(begin = var_1994_begin_0, end = var_1994_end_0, end_mask = var_1994_end_mask_0, x = value_slice_1_cast_fp16)[name = string("op_1994_cast_fp16")]; + int32 var_2021 = const()[name = string("op_2021"), val = int32(2)]; + bool shifted_value_1_interleave_0 = const()[name = string("shifted_value_1_interleave_0"), val = bool(false)]; + tensor value_states_3 = transpose(perm = var_1766, x = var_1761)[name = string("transpose_232")]; + tensor shifted_value_1_cast_fp16 = concat(axis = var_2021, interleave = shifted_value_1_interleave_0, values = (var_1994_cast_fp16, value_states_3))[name = string("shifted_value_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([22, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([23, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = shifted_value_1_cast_fp16, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_157_write_state")]; + tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_157")]; + tensor var_2049_begin_0 = const()[name = string("op_2049_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2049_end_0 = const()[name = string("op_2049_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_2049_end_mask_0 = const()[name = string("op_2049_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2049_cast_fp16 = slice_by_index(begin = var_2049_begin_0, end = var_2049_end_0, end_mask = var_2049_end_mask_0, x = coreml_update_state_53)[name = string("op_2049_cast_fp16")]; + tensor var_2056_begin_0 = const()[name = string("op_2056_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_2056_end_0 = const()[name = string("op_2056_end_0"), val = tensor([23, 1, 512, 256])]; + tensor var_2056_end_mask_0 = const()[name = string("op_2056_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2056_cast_fp16 = slice_by_index(begin = var_2056_begin_0, end = var_2056_end_0, end_mask = var_2056_end_mask_0, x = coreml_update_state_53)[name = string("op_2056_cast_fp16")]; + tensor var_2095 = const()[name = string("op_2095"), val = tensor([1, 4, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_2095, x = var_2049_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_2115 = const()[name = string("op_2115"), val = tensor([1, 4, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_2115, x = var_2056_cast_fp16)[name = string("x_11_cast_fp16")]; + bool var_2142_transpose_x_0 = const()[name = string("op_2142_transpose_x_0"), val = bool(false)]; + bool var_2142_transpose_y_0 = const()[name = string("op_2142_transpose_y_0"), val = bool(true)]; + tensor var_2142 = matmul(transpose_x = var_2142_transpose_x_0, transpose_y = var_2142_transpose_y_0, x = query_states_3_cast_fp16, y = x_5_cast_fp16)[name = string("op_2142")]; + fp16 var_2143_to_fp16 = const()[name = string("op_2143_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_2142, y = var_2143_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor mask_slice_1_begin_0 = const()[name = string("mask_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor mask_slice_1_end_0 = const()[name = string("mask_slice_1_end_0"), val = tensor([1, 1, 64, 512])]; + tensor mask_slice_1_end_mask_0 = const()[name = string("mask_slice_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor mask_slice_1 = slice_by_index(begin = mask_slice_1_begin_0, end = mask_slice_1_end_0, end_mask = mask_slice_1_end_mask_0, x = causal_mask)[name = string("mask_slice_1")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask_slice_1)[name = string("attn_weights_3_cast_fp16")]; + int32 var_2178 = const()[name = string("op_2178"), val = int32(-1)]; + tensor var_2180_cast_fp16 = softmax(axis = var_2178, x = attn_weights_3_cast_fp16)[name = string("op_2180_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 64, 512])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_8, x = var_2180_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([4, 512, 256])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_9, x = x_11_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([1, 4, 64, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_13, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_2192_perm_0 = const()[name = string("op_2192_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2211 = const()[name = string("op_2211"), val = tensor([1, 64, 1024])]; + tensor var_2192_cast_fp16 = transpose(perm = var_2192_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_231")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_2211, x = var_2192_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_2216 = const()[name = string("op_2216"), val = tensor([0, 2, 1])]; + string var_2232_pad_type_0 = const()[name = string("op_2232_pad_type_0"), val = string("valid")]; + int32 var_2232_groups_0 = const()[name = string("op_2232_groups_0"), val = int32(1)]; + tensor var_2232_strides_0 = const()[name = string("op_2232_strides_0"), val = tensor([1])]; + tensor var_2232_pad_0 = const()[name = string("op_2232_pad_0"), val = tensor([0, 0])]; + tensor var_2232_dilations_0 = const()[name = string("op_2232_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379729408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380614208))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2217_cast_fp16 = transpose(perm = var_2216, x = attn_output_5_cast_fp16)[name = string("transpose_230")]; + tensor var_2232_cast_fp16 = conv(dilations = var_2232_dilations_0, groups = var_2232_groups_0, pad = var_2232_pad_0, pad_type = var_2232_pad_type_0, strides = var_2232_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2217_cast_fp16)[name = string("op_2232_cast_fp16")]; + tensor var_2236 = const()[name = string("op_2236"), val = tensor([0, 2, 1])]; + int32 var_2247 = const()[name = string("op_2247"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_13_cast_fp16 = transpose(perm = var_2236, x = var_2232_cast_fp16)[name = string("transpose_229")]; + tensor var_2249_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2249_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_2247, interleave = input_11_interleave_0, values = (hidden_states_13_cast_fp16, var_2249_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2244_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor var_2263_to_fp16 = const()[name = string("op_2263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380651136)))]; + tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2263_to_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = attn_output_9_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + int32 var_2276 = const()[name = string("op_2276"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2278_cast_fp16 = mul(x = hidden_states_15_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2278_cast_fp16")]; + bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; + tensor input_13_cast_fp16 = concat(axis = var_2276, interleave = input_13_interleave_0, values = (hidden_states_15_cast_fp16, var_2278_cast_fp16))[name = string("input_13_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2273_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor var_2292_to_fp16 = const()[name = string("op_2292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380653504)))]; + tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2292_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, 1])]; + tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; + tensor var_2305_cast_fp16 = transpose(perm = var_2304, x = x_13_cast_fp16)[name = string("transpose_228")]; + tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2305_cast_fp16)[name = string("input_15_cast_fp16")]; + string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; + tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; + tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; + int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380655872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386627904))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386849152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392821184))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; + string var_2330_mode_0 = const()[name = string("op_2330_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2330_cast_fp16 = gelu(mode = var_2330_mode_0, x = x_15_cast_fp16)[name = string("op_2330_cast_fp16")]; + tensor input_17_cast_fp16 = mul(x = var_2330_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393042432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399014464))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; + tensor var_2338_axes_0 = const()[name = string("op_2338_axes_0"), val = tensor([2])]; + tensor var_2338_cast_fp16 = squeeze(axes = var_2338_axes_0, x = e_1_cast_fp16)[name = string("op_2338_cast_fp16")]; + tensor var_2339 = const()[name = string("op_2339"), val = tensor([0, 2, 1])]; + int32 var_2350 = const()[name = string("op_2350"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_17_cast_fp16 = transpose(perm = var_2339, x = var_2338_cast_fp16)[name = string("transpose_227")]; + tensor var_2352_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2352_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2350, interleave = input_19_interleave_0, values = (hidden_states_17_cast_fp16, var_2352_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2347_to_fp16 = const()[name = string("op_2347_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2347_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; + tensor var_2366_to_fp16 = const()[name = string("op_2366_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399051392)))]; + tensor hidden_states_19_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2366_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2420 = const()[name = string("op_2420"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2422_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2422_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_2420, interleave = input_21_interleave_0, values = (hidden_states_21_cast_fp16, var_2422_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2417_to_fp16 = const()[name = string("op_2417_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2417_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; + tensor var_2436_to_fp16 = const()[name = string("op_2436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399053760)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2436_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2447 = const()[name = string("op_2447"), val = tensor([0, 2, 1])]; + tensor var_2450_axes_0 = const()[name = string("op_2450_axes_0"), val = tensor([2])]; + tensor var_2448_cast_fp16 = transpose(perm = var_2447, x = hidden_states_23_cast_fp16)[name = string("transpose_226")]; + tensor var_2450_cast_fp16 = expand_dims(axes = var_2450_axes_0, x = var_2448_cast_fp16)[name = string("op_2450_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2450_cast_fp16)[name = string("query_states_9")]; + string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; + tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; + tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; + int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; + tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2450_cast_fp16)[name = string("key_states_11")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2450_cast_fp16)[name = string("value_states_9")]; + tensor var_2492 = const()[name = string("op_2492"), val = tensor([1, 4, 256, 64])]; + tensor var_2493 = reshape(shape = var_2492, x = query_states_9)[name = string("op_2493")]; + tensor var_2498 = const()[name = string("op_2498"), val = tensor([0, 1, 3, 2])]; + tensor var_2503 = const()[name = string("op_2503"), val = tensor([1, 1, 256, 64])]; + tensor var_2504 = reshape(shape = var_2503, x = key_states_11)[name = string("op_2504")]; + tensor var_2509 = const()[name = string("op_2509"), val = tensor([0, 1, 3, 2])]; + tensor var_2514 = const()[name = string("op_2514"), val = tensor([1, 1, 256, 64])]; + tensor var_2515 = reshape(shape = var_2514, x = value_states_9)[name = string("op_2515")]; + tensor var_2520 = const()[name = string("op_2520"), val = tensor([0, 1, 3, 2])]; + int32 var_2531 = const()[name = string("op_2531"), val = int32(-1)]; + fp16 const_48_promoted = const()[name = string("const_48_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_25 = transpose(perm = var_2498, x = var_2493)[name = string("transpose_225")]; + tensor var_2533 = mul(x = hidden_states_25, y = const_48_promoted)[name = string("op_2533")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2531, interleave = input_25_interleave_0, values = (hidden_states_25, var_2533))[name = string("input_25")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2528_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; + tensor var_2547_to_fp16 = const()[name = string("op_2547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056128)))]; + tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2547_to_fp16)[name = string("q_3_cast_fp16")]; + int32 var_2558 = const()[name = string("op_2558"), val = int32(-1)]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_27 = transpose(perm = var_2509, x = var_2504)[name = string("transpose_224")]; + tensor var_2560 = mul(x = hidden_states_27, y = const_52_promoted)[name = string("op_2560")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_2558, interleave = input_27_interleave_0, values = (hidden_states_27, var_2560))[name = string("input_27")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2555_to_fp16 = const()[name = string("op_2555_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2555_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; + tensor var_2574_to_fp16 = const()[name = string("op_2574_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399056704)))]; + tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2574_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_2588_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5)[name = string("op_2588_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2609_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_2609_cast_fp16")]; + int32 var_2611 = const()[name = string("op_2611"), val = int32(-1)]; + bool var_2612_interleave_0 = const()[name = string("op_2612_interleave_0"), val = bool(false)]; + tensor var_2612_cast_fp16 = concat(axis = var_2611, interleave = var_2612_interleave_0, values = (var_2609_cast_fp16, x1_5_cast_fp16))[name = string("op_2612_cast_fp16")]; + tensor var_2613_cast_fp16 = mul(x = var_2612_cast_fp16, y = sin_5)[name = string("op_2613_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_2588_cast_fp16, y = var_2613_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor var_2616_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5)[name = string("op_2616_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2637_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_2637_cast_fp16")]; + int32 var_2639 = const()[name = string("op_2639"), val = int32(-1)]; + bool var_2640_interleave_0 = const()[name = string("op_2640_interleave_0"), val = bool(false)]; + tensor var_2640_cast_fp16 = concat(axis = var_2639, interleave = var_2640_interleave_0, values = (var_2637_cast_fp16, x1_7_cast_fp16))[name = string("op_2640_cast_fp16")]; + tensor var_2641_cast_fp16 = mul(x = var_2640_cast_fp16, y = sin_5)[name = string("op_2641_cast_fp16")]; + tensor key_states_13_cast_fp16 = add(x = var_2616_cast_fp16, y = var_2641_cast_fp16)[name = string("key_states_13_cast_fp16")]; + tensor key_slice_3_begin_0 = const()[name = string("key_slice_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor key_slice_3_end_0 = const()[name = string("key_slice_3_end_0"), val = tensor([2, 1, 512, 256])]; + tensor key_slice_3_end_mask_0 = const()[name = string("key_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_3_cast_fp16 = slice_by_index(begin = key_slice_3_begin_0, end = key_slice_3_end_0, end_mask = key_slice_3_end_mask_0, x = coreml_update_state_53)[name = string("key_slice_3_cast_fp16")]; + tensor var_2678_begin_0 = const()[name = string("op_2678_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2678_end_0 = const()[name = string("op_2678_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_2678_end_mask_0 = const()[name = string("op_2678_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2678_cast_fp16 = slice_by_index(begin = var_2678_begin_0, end = var_2678_end_0, end_mask = var_2678_end_mask_0, x = key_slice_3_cast_fp16)[name = string("op_2678_cast_fp16")]; + int32 var_2705 = const()[name = string("op_2705"), val = int32(2)]; + bool shifted_key_3_interleave_0 = const()[name = string("shifted_key_3_interleave_0"), val = bool(false)]; + tensor shifted_key_3_cast_fp16 = concat(axis = var_2705, interleave = shifted_key_3_interleave_0, values = (var_2678_cast_fp16, key_states_13_cast_fp16))[name = string("shifted_key_3_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([2, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = shifted_key_3_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_158_write_state")]; + tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_158")]; + tensor value_slice_3_begin_0 = const()[name = string("value_slice_3_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor value_slice_3_end_0 = const()[name = string("value_slice_3_end_0"), val = tensor([24, 1, 512, 256])]; + tensor value_slice_3_end_mask_0 = const()[name = string("value_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_3_cast_fp16 = slice_by_index(begin = value_slice_3_begin_0, end = value_slice_3_end_0, end_mask = value_slice_3_end_mask_0, x = coreml_update_state_54)[name = string("value_slice_3_cast_fp16")]; + tensor var_2748_begin_0 = const()[name = string("op_2748_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2748_end_0 = const()[name = string("op_2748_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_2748_end_mask_0 = const()[name = string("op_2748_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2748_cast_fp16 = slice_by_index(begin = var_2748_begin_0, end = var_2748_end_0, end_mask = var_2748_end_mask_0, x = value_slice_3_cast_fp16)[name = string("op_2748_cast_fp16")]; + int32 var_2775 = const()[name = string("op_2775"), val = int32(2)]; + bool shifted_value_3_interleave_0 = const()[name = string("shifted_value_3_interleave_0"), val = bool(false)]; + tensor value_states_11 = transpose(perm = var_2520, x = var_2515)[name = string("transpose_223")]; + tensor shifted_value_3_cast_fp16 = concat(axis = var_2775, interleave = shifted_value_3_interleave_0, values = (var_2748_cast_fp16, value_states_11))[name = string("shifted_value_3_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([23, 0, 0, 0])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([24, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_16, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_17, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = shifted_value_3_cast_fp16, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_159_write_state")]; + tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_159")]; + tensor var_2803_begin_0 = const()[name = string("op_2803_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2803_end_0 = const()[name = string("op_2803_end_0"), val = tensor([2, 1, 512, 256])]; + tensor var_2803_end_mask_0 = const()[name = string("op_2803_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2803_cast_fp16 = slice_by_index(begin = var_2803_begin_0, end = var_2803_end_0, end_mask = var_2803_end_mask_0, x = coreml_update_state_55)[name = string("op_2803_cast_fp16")]; + tensor var_2810_begin_0 = const()[name = string("op_2810_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_2810_end_0 = const()[name = string("op_2810_end_0"), val = tensor([24, 1, 512, 256])]; + tensor var_2810_end_mask_0 = const()[name = string("op_2810_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2810_cast_fp16 = slice_by_index(begin = var_2810_begin_0, end = var_2810_end_0, end_mask = var_2810_end_mask_0, x = coreml_update_state_55)[name = string("op_2810_cast_fp16")]; + tensor var_2849 = const()[name = string("op_2849"), val = tensor([1, 4, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2849, x = var_2803_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2869 = const()[name = string("op_2869"), val = tensor([1, 4, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2869, x = var_2810_cast_fp16)[name = string("x_27_cast_fp16")]; + bool var_2896_transpose_x_0 = const()[name = string("op_2896_transpose_x_0"), val = bool(false)]; + bool var_2896_transpose_y_0 = const()[name = string("op_2896_transpose_y_0"), val = bool(true)]; + tensor var_2896 = matmul(transpose_x = var_2896_transpose_x_0, transpose_y = var_2896_transpose_y_0, x = query_states_11_cast_fp16, y = x_21_cast_fp16)[name = string("op_2896")]; + fp16 var_2897_to_fp16 = const()[name = string("op_2897_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_2896, y = var_2897_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask_slice_1)[name = string("attn_weights_7_cast_fp16")]; + int32 var_2932 = const()[name = string("op_2932"), val = int32(-1)]; + tensor var_2934_cast_fp16 = softmax(axis = var_2932, x = attn_weights_7_cast_fp16)[name = string("op_2934_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 64, 512])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_22, x = var_2934_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([4, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_23, x = x_27_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([1, 4, 64, 256])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_27, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_2946_perm_0 = const()[name = string("op_2946_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2965 = const()[name = string("op_2965"), val = tensor([1, 64, 1024])]; + tensor var_2946_cast_fp16 = transpose(perm = var_2946_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_222")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2965, x = var_2946_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2970 = const()[name = string("op_2970"), val = tensor([0, 2, 1])]; + string var_2986_pad_type_0 = const()[name = string("op_2986_pad_type_0"), val = string("valid")]; + int32 var_2986_groups_0 = const()[name = string("op_2986_groups_0"), val = int32(1)]; + tensor var_2986_strides_0 = const()[name = string("op_2986_strides_0"), val = tensor([1])]; + tensor var_2986_pad_0 = const()[name = string("op_2986_pad_0"), val = tensor([0, 0])]; + tensor var_2986_dilations_0 = const()[name = string("op_2986_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399057280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399942080))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2971_cast_fp16 = transpose(perm = var_2970, x = attn_output_15_cast_fp16)[name = string("transpose_221")]; + tensor var_2986_cast_fp16 = conv(dilations = var_2986_dilations_0, groups = var_2986_groups_0, pad = var_2986_pad_0, pad_type = var_2986_pad_type_0, strides = var_2986_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2971_cast_fp16)[name = string("op_2986_cast_fp16")]; + tensor var_2990 = const()[name = string("op_2990"), val = tensor([0, 2, 1])]; + int32 var_3001 = const()[name = string("op_3001"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_29_cast_fp16 = transpose(perm = var_2990, x = var_2986_cast_fp16)[name = string("transpose_220")]; + tensor var_3003_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3003_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_3001, interleave = input_31_interleave_0, values = (hidden_states_29_cast_fp16, var_3003_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2998_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; + tensor var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399979008)))]; + tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_3017_to_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + int32 var_3030 = const()[name = string("op_3030"), val = int32(-1)]; + fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3032_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_3032_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_3030, interleave = input_33_interleave_0, values = (hidden_states_31_cast_fp16, var_3032_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_3027_to_fp16 = const()[name = string("op_3027_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_3027_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; + tensor var_3046_to_fp16 = const()[name = string("op_3046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399981376)))]; + tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_3046_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_3058 = const()[name = string("op_3058"), val = tensor([0, 2, 1])]; + tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; + tensor var_3059_cast_fp16 = transpose(perm = var_3058, x = x_29_cast_fp16)[name = string("transpose_219")]; + tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_3059_cast_fp16)[name = string("input_35_cast_fp16")]; + string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; + tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; + tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; + int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399983744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405955776))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406177024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412149056))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; + string var_3084_mode_0 = const()[name = string("op_3084_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3084_cast_fp16 = gelu(mode = var_3084_mode_0, x = x_31_cast_fp16)[name = string("op_3084_cast_fp16")]; + tensor input_37_cast_fp16 = mul(x = var_3084_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412370304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418342336))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; + tensor var_3092_axes_0 = const()[name = string("op_3092_axes_0"), val = tensor([2])]; + tensor var_3092_cast_fp16 = squeeze(axes = var_3092_axes_0, x = e_3_cast_fp16)[name = string("op_3092_cast_fp16")]; + tensor var_3093 = const()[name = string("op_3093"), val = tensor([0, 2, 1])]; + int32 var_3104 = const()[name = string("op_3104"), val = int32(-1)]; + fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_33_cast_fp16 = transpose(perm = var_3093, x = var_3092_cast_fp16)[name = string("transpose_218")]; + tensor var_3106_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_3106_cast_fp16")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39_cast_fp16 = concat(axis = var_3104, interleave = input_39_interleave_0, values = (hidden_states_33_cast_fp16, var_3106_cast_fp16))[name = string("input_39_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_3101_to_fp16 = const()[name = string("op_3101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_3101_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor var_3120_to_fp16 = const()[name = string("op_3120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418379264)))]; + tensor hidden_states_35_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_3120_to_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + int32 var_3174 = const()[name = string("op_3174"), val = int32(-1)]; + fp16 const_85_promoted_to_fp16 = const()[name = string("const_85_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3176_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = const_85_promoted_to_fp16)[name = string("op_3176_cast_fp16")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41_cast_fp16 = concat(axis = var_3174, interleave = input_41_interleave_0, values = (hidden_states_37_cast_fp16, var_3176_cast_fp16))[name = string("input_41_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3171_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor var_3190_to_fp16 = const()[name = string("op_3190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418381632)))]; + tensor hidden_states_39_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_3190_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor var_3201 = const()[name = string("op_3201"), val = tensor([0, 2, 1])]; + tensor var_3204_axes_0 = const()[name = string("op_3204_axes_0"), val = tensor([2])]; + tensor var_3202_cast_fp16 = transpose(perm = var_3201, x = hidden_states_39_cast_fp16)[name = string("transpose_217")]; + tensor var_3204_cast_fp16 = expand_dims(axes = var_3204_axes_0, x = var_3202_cast_fp16)[name = string("op_3204_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_3204_cast_fp16)[name = string("query_states_17")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_3204_cast_fp16)[name = string("key_states_21")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_3204_cast_fp16)[name = string("value_states_17")]; + tensor var_3246 = const()[name = string("op_3246"), val = tensor([1, 4, 256, 64])]; + tensor var_3247 = reshape(shape = var_3246, x = query_states_17)[name = string("op_3247")]; + tensor var_3252 = const()[name = string("op_3252"), val = tensor([0, 1, 3, 2])]; + tensor var_3257 = const()[name = string("op_3257"), val = tensor([1, 1, 256, 64])]; + tensor var_3258 = reshape(shape = var_3257, x = key_states_21)[name = string("op_3258")]; + tensor var_3263 = const()[name = string("op_3263"), val = tensor([0, 1, 3, 2])]; + tensor var_3268 = const()[name = string("op_3268"), val = tensor([1, 1, 256, 64])]; + tensor var_3269 = reshape(shape = var_3268, x = value_states_17)[name = string("op_3269")]; + tensor var_3274 = const()[name = string("op_3274"), val = tensor([0, 1, 3, 2])]; + int32 var_3285 = const()[name = string("op_3285"), val = int32(-1)]; + fp16 const_90_promoted = const()[name = string("const_90_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_41 = transpose(perm = var_3252, x = var_3247)[name = string("transpose_216")]; + tensor var_3287 = mul(x = hidden_states_41, y = const_90_promoted)[name = string("op_3287")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45 = concat(axis = var_3285, interleave = input_45_interleave_0, values = (hidden_states_41, var_3287))[name = string("input_45")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3282_to_fp16 = const()[name = string("op_3282_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3282_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor var_3301_to_fp16 = const()[name = string("op_3301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384000)))]; + tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3301_to_fp16)[name = string("q_5_cast_fp16")]; + int32 var_3312 = const()[name = string("op_3312"), val = int32(-1)]; + fp16 const_94_promoted = const()[name = string("const_94_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_43 = transpose(perm = var_3263, x = var_3258)[name = string("transpose_215")]; + tensor var_3314 = mul(x = hidden_states_43, y = const_94_promoted)[name = string("op_3314")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47 = concat(axis = var_3312, interleave = input_47_interleave_0, values = (hidden_states_43, var_3314))[name = string("input_47")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3309_to_fp16 = const()[name = string("op_3309_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3309_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor var_3328_to_fp16 = const()[name = string("op_3328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418384576)))]; + tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3328_to_fp16)[name = string("k_5_cast_fp16")]; + tensor var_3342_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5)[name = string("op_3342_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3363_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_100_promoted_to_fp16)[name = string("op_3363_cast_fp16")]; + int32 var_3365 = const()[name = string("op_3365"), val = int32(-1)]; + bool var_3366_interleave_0 = const()[name = string("op_3366_interleave_0"), val = bool(false)]; + tensor var_3366_cast_fp16 = concat(axis = var_3365, interleave = var_3366_interleave_0, values = (var_3363_cast_fp16, x1_9_cast_fp16))[name = string("op_3366_cast_fp16")]; + tensor var_3367_cast_fp16 = mul(x = var_3366_cast_fp16, y = sin_5)[name = string("op_3367_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_3342_cast_fp16, y = var_3367_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor var_3370_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5)[name = string("op_3370_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3391_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_3391_cast_fp16")]; + int32 var_3393 = const()[name = string("op_3393"), val = int32(-1)]; + bool var_3394_interleave_0 = const()[name = string("op_3394_interleave_0"), val = bool(false)]; + tensor var_3394_cast_fp16 = concat(axis = var_3393, interleave = var_3394_interleave_0, values = (var_3391_cast_fp16, x1_11_cast_fp16))[name = string("op_3394_cast_fp16")]; + tensor var_3395_cast_fp16 = mul(x = var_3394_cast_fp16, y = sin_5)[name = string("op_3395_cast_fp16")]; + tensor key_states_23_cast_fp16 = add(x = var_3370_cast_fp16, y = var_3395_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor key_slice_5_begin_0 = const()[name = string("key_slice_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor key_slice_5_end_0 = const()[name = string("key_slice_5_end_0"), val = tensor([3, 1, 512, 256])]; + tensor key_slice_5_end_mask_0 = const()[name = string("key_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_5_cast_fp16 = slice_by_index(begin = key_slice_5_begin_0, end = key_slice_5_end_0, end_mask = key_slice_5_end_mask_0, x = coreml_update_state_55)[name = string("key_slice_5_cast_fp16")]; + tensor var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3432_end_0 = const()[name = string("op_3432_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = var_3432_end_0, end_mask = var_3432_end_mask_0, x = key_slice_5_cast_fp16)[name = string("op_3432_cast_fp16")]; + int32 var_3459 = const()[name = string("op_3459"), val = int32(2)]; + bool shifted_key_5_interleave_0 = const()[name = string("shifted_key_5_interleave_0"), val = bool(false)]; + tensor shifted_key_5_cast_fp16 = concat(axis = var_3459, interleave = shifted_key_5_interleave_0, values = (var_3432_cast_fp16, key_states_23_cast_fp16))[name = string("shifted_key_5_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([2, 0, 0, 0])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([3, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_28, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_29, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = shifted_key_5_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_160_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_160")]; + tensor value_slice_5_begin_0 = const()[name = string("value_slice_5_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor value_slice_5_end_0 = const()[name = string("value_slice_5_end_0"), val = tensor([25, 1, 512, 256])]; + tensor value_slice_5_end_mask_0 = const()[name = string("value_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_5_cast_fp16 = slice_by_index(begin = value_slice_5_begin_0, end = value_slice_5_end_0, end_mask = value_slice_5_end_mask_0, x = coreml_update_state_56)[name = string("value_slice_5_cast_fp16")]; + tensor var_3502_begin_0 = const()[name = string("op_3502_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3502_end_0 = const()[name = string("op_3502_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_3502_end_mask_0 = const()[name = string("op_3502_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3502_cast_fp16 = slice_by_index(begin = var_3502_begin_0, end = var_3502_end_0, end_mask = var_3502_end_mask_0, x = value_slice_5_cast_fp16)[name = string("op_3502_cast_fp16")]; + int32 var_3529 = const()[name = string("op_3529"), val = int32(2)]; + bool shifted_value_5_interleave_0 = const()[name = string("shifted_value_5_interleave_0"), val = bool(false)]; + tensor value_states_19 = transpose(perm = var_3274, x = var_3269)[name = string("transpose_214")]; + tensor shifted_value_5_cast_fp16 = concat(axis = var_3529, interleave = shifted_value_5_interleave_0, values = (var_3502_cast_fp16, value_states_19))[name = string("shifted_value_5_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 0, 0, 0])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([25, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = shifted_value_5_cast_fp16, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_161_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_161")]; + tensor var_3557_begin_0 = const()[name = string("op_3557_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_3557_end_0 = const()[name = string("op_3557_end_0"), val = tensor([3, 1, 512, 256])]; + tensor var_3557_end_mask_0 = const()[name = string("op_3557_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3557_cast_fp16 = slice_by_index(begin = var_3557_begin_0, end = var_3557_end_0, end_mask = var_3557_end_mask_0, x = coreml_update_state_57)[name = string("op_3557_cast_fp16")]; + tensor var_3564_begin_0 = const()[name = string("op_3564_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_3564_end_0 = const()[name = string("op_3564_end_0"), val = tensor([25, 1, 512, 256])]; + tensor var_3564_end_mask_0 = const()[name = string("op_3564_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3564_cast_fp16 = slice_by_index(begin = var_3564_begin_0, end = var_3564_end_0, end_mask = var_3564_end_mask_0, x = coreml_update_state_57)[name = string("op_3564_cast_fp16")]; + tensor var_3603 = const()[name = string("op_3603"), val = tensor([1, 4, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_3603, x = var_3557_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_3623 = const()[name = string("op_3623"), val = tensor([1, 4, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_3623, x = var_3564_cast_fp16)[name = string("x_43_cast_fp16")]; + bool var_3650_transpose_x_0 = const()[name = string("op_3650_transpose_x_0"), val = bool(false)]; + bool var_3650_transpose_y_0 = const()[name = string("op_3650_transpose_y_0"), val = bool(true)]; + tensor var_3650 = matmul(transpose_x = var_3650_transpose_x_0, transpose_y = var_3650_transpose_y_0, x = query_states_19_cast_fp16, y = x_37_cast_fp16)[name = string("op_3650")]; + fp16 var_3651_to_fp16 = const()[name = string("op_3651_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_3650, y = var_3651_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask_slice_1)[name = string("attn_weights_11_cast_fp16")]; + int32 var_3686 = const()[name = string("op_3686"), val = int32(-1)]; + tensor var_3688_cast_fp16 = softmax(axis = var_3686, x = attn_weights_11_cast_fp16)[name = string("op_3688_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 64, 512])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_36, x = var_3688_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([4, 512, 256])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_37, x = x_43_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([1, 4, 64, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_41, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_3700_perm_0 = const()[name = string("op_3700_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3719 = const()[name = string("op_3719"), val = tensor([1, 64, 1024])]; + tensor var_3700_cast_fp16 = transpose(perm = var_3700_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_213")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_3719, x = var_3700_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3724 = const()[name = string("op_3724"), val = tensor([0, 2, 1])]; + string var_3740_pad_type_0 = const()[name = string("op_3740_pad_type_0"), val = string("valid")]; + int32 var_3740_groups_0 = const()[name = string("op_3740_groups_0"), val = int32(1)]; + tensor var_3740_strides_0 = const()[name = string("op_3740_strides_0"), val = tensor([1])]; + tensor var_3740_pad_0 = const()[name = string("op_3740_pad_0"), val = tensor([0, 0])]; + tensor var_3740_dilations_0 = const()[name = string("op_3740_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418385152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419269952))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3725_cast_fp16 = transpose(perm = var_3724, x = attn_output_25_cast_fp16)[name = string("transpose_212")]; + tensor var_3740_cast_fp16 = conv(dilations = var_3740_dilations_0, groups = var_3740_groups_0, pad = var_3740_pad_0, pad_type = var_3740_pad_type_0, strides = var_3740_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3725_cast_fp16)[name = string("op_3740_cast_fp16")]; + tensor var_3744 = const()[name = string("op_3744"), val = tensor([0, 2, 1])]; + int32 var_3755 = const()[name = string("op_3755"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_45_cast_fp16 = transpose(perm = var_3744, x = var_3740_cast_fp16)[name = string("transpose_211")]; + tensor var_3757_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3757_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_3755, interleave = input_51_interleave_0, values = (hidden_states_45_cast_fp16, var_3757_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3752_to_fp16 = const()[name = string("op_3752_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3752_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor var_3771_to_fp16 = const()[name = string("op_3771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419306880)))]; + tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3771_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + int32 var_3784 = const()[name = string("op_3784"), val = int32(-1)]; + fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3786_cast_fp16 = mul(x = hidden_states_47_cast_fp16, y = const_118_promoted_to_fp16)[name = string("op_3786_cast_fp16")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53_cast_fp16 = concat(axis = var_3784, interleave = input_53_interleave_0, values = (hidden_states_47_cast_fp16, var_3786_cast_fp16))[name = string("input_53_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3781_to_fp16 = const()[name = string("op_3781_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3781_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419309248)))]; + tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3800_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_3812 = const()[name = string("op_3812"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_3813_cast_fp16 = transpose(perm = var_3812, x = x_45_cast_fp16)[name = string("transpose_210")]; + tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3813_cast_fp16)[name = string("input_55_cast_fp16")]; + string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; + tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; + tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; + int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419311616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425283648))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425504896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431476928))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; + string var_3838_mode_0 = const()[name = string("op_3838_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3838_cast_fp16 = gelu(mode = var_3838_mode_0, x = x_47_cast_fp16)[name = string("op_3838_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = var_3838_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431698176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437670208))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; + tensor var_3846_axes_0 = const()[name = string("op_3846_axes_0"), val = tensor([2])]; + tensor var_3846_cast_fp16 = squeeze(axes = var_3846_axes_0, x = e_5_cast_fp16)[name = string("op_3846_cast_fp16")]; + tensor var_3847 = const()[name = string("op_3847"), val = tensor([0, 2, 1])]; + int32 var_3858 = const()[name = string("op_3858"), val = int32(-1)]; + fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_49_cast_fp16 = transpose(perm = var_3847, x = var_3846_cast_fp16)[name = string("transpose_209")]; + tensor var_3860_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_3860_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_3858, interleave = input_59_interleave_0, values = (hidden_states_49_cast_fp16, var_3860_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3855_to_fp16 = const()[name = string("op_3855_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3855_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; + tensor var_3874_to_fp16 = const()[name = string("op_3874_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437707136)))]; + tensor hidden_states_51_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3874_to_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + int32 var_3928 = const()[name = string("op_3928"), val = int32(-1)]; + fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3930_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_127_promoted_to_fp16)[name = string("op_3930_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_3928, interleave = input_61_interleave_0, values = (hidden_states_53_cast_fp16, var_3930_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3925_to_fp16 = const()[name = string("op_3925_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3925_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; + tensor var_3944_to_fp16 = const()[name = string("op_3944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437709504)))]; + tensor hidden_states_55_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3944_to_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor var_3955 = const()[name = string("op_3955"), val = tensor([0, 2, 1])]; + tensor var_3958_axes_0 = const()[name = string("op_3958_axes_0"), val = tensor([2])]; + tensor var_3956_cast_fp16 = transpose(perm = var_3955, x = hidden_states_55_cast_fp16)[name = string("transpose_208")]; + tensor var_3958_cast_fp16 = expand_dims(axes = var_3958_axes_0, x = var_3956_cast_fp16)[name = string("op_3958_cast_fp16")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3958_cast_fp16)[name = string("query_states_25")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3958_cast_fp16)[name = string("key_states_31")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3958_cast_fp16)[name = string("value_states_25")]; + tensor var_4000 = const()[name = string("op_4000"), val = tensor([1, 4, 256, 64])]; + tensor var_4001 = reshape(shape = var_4000, x = query_states_25)[name = string("op_4001")]; + tensor var_4006 = const()[name = string("op_4006"), val = tensor([0, 1, 3, 2])]; + tensor var_4011 = const()[name = string("op_4011"), val = tensor([1, 1, 256, 64])]; + tensor var_4012 = reshape(shape = var_4011, x = key_states_31)[name = string("op_4012")]; + tensor var_4017 = const()[name = string("op_4017"), val = tensor([0, 1, 3, 2])]; + tensor var_4022 = const()[name = string("op_4022"), val = tensor([1, 1, 256, 64])]; + tensor var_4023 = reshape(shape = var_4022, x = value_states_25)[name = string("op_4023")]; + tensor var_4028 = const()[name = string("op_4028"), val = tensor([0, 1, 3, 2])]; + int32 var_4039 = const()[name = string("op_4039"), val = int32(-1)]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_57 = transpose(perm = var_4006, x = var_4001)[name = string("transpose_207")]; + tensor var_4041 = mul(x = hidden_states_57, y = const_132_promoted)[name = string("op_4041")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_4039, interleave = input_65_interleave_0, values = (hidden_states_57, var_4041))[name = string("input_65")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_4036_to_fp16 = const()[name = string("op_4036_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_4036_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; + tensor var_4055_to_fp16 = const()[name = string("op_4055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437711872)))]; + tensor q_7_cast_fp16 = mul(x = normed_79, y = var_4055_to_fp16)[name = string("q_7_cast_fp16")]; + int32 var_4066 = const()[name = string("op_4066"), val = int32(-1)]; + fp16 const_136_promoted = const()[name = string("const_136_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_59 = transpose(perm = var_4017, x = var_4012)[name = string("transpose_206")]; + tensor var_4068 = mul(x = hidden_states_59, y = const_136_promoted)[name = string("op_4068")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_4066, interleave = input_67_interleave_0, values = (hidden_states_59, var_4068))[name = string("input_67")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_4063_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; + tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437712448)))]; + tensor k_7_cast_fp16 = mul(x = normed_83, y = var_4082_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_4096_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5)[name = string("op_4096_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4117_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_142_promoted_to_fp16)[name = string("op_4117_cast_fp16")]; + int32 var_4119 = const()[name = string("op_4119"), val = int32(-1)]; + bool var_4120_interleave_0 = const()[name = string("op_4120_interleave_0"), val = bool(false)]; + tensor var_4120_cast_fp16 = concat(axis = var_4119, interleave = var_4120_interleave_0, values = (var_4117_cast_fp16, x1_13_cast_fp16))[name = string("op_4120_cast_fp16")]; + tensor var_4121_cast_fp16 = mul(x = var_4120_cast_fp16, y = sin_5)[name = string("op_4121_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_4096_cast_fp16, y = var_4121_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor var_4124_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5)[name = string("op_4124_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_145_promoted_to_fp16 = const()[name = string("const_145_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4145_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_145_promoted_to_fp16)[name = string("op_4145_cast_fp16")]; + int32 var_4147 = const()[name = string("op_4147"), val = int32(-1)]; + bool var_4148_interleave_0 = const()[name = string("op_4148_interleave_0"), val = bool(false)]; + tensor var_4148_cast_fp16 = concat(axis = var_4147, interleave = var_4148_interleave_0, values = (var_4145_cast_fp16, x1_15_cast_fp16))[name = string("op_4148_cast_fp16")]; + tensor var_4149_cast_fp16 = mul(x = var_4148_cast_fp16, y = sin_5)[name = string("op_4149_cast_fp16")]; + tensor key_states_33_cast_fp16 = add(x = var_4124_cast_fp16, y = var_4149_cast_fp16)[name = string("key_states_33_cast_fp16")]; + tensor key_slice_7_begin_0 = const()[name = string("key_slice_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor key_slice_7_end_0 = const()[name = string("key_slice_7_end_0"), val = tensor([4, 1, 512, 256])]; + tensor key_slice_7_end_mask_0 = const()[name = string("key_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_7_cast_fp16 = slice_by_index(begin = key_slice_7_begin_0, end = key_slice_7_end_0, end_mask = key_slice_7_end_mask_0, x = coreml_update_state_57)[name = string("key_slice_7_cast_fp16")]; + tensor var_4186_begin_0 = const()[name = string("op_4186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4186_end_0 = const()[name = string("op_4186_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_4186_end_mask_0 = const()[name = string("op_4186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4186_cast_fp16 = slice_by_index(begin = var_4186_begin_0, end = var_4186_end_0, end_mask = var_4186_end_mask_0, x = key_slice_7_cast_fp16)[name = string("op_4186_cast_fp16")]; + int32 var_4213 = const()[name = string("op_4213"), val = int32(2)]; + bool shifted_key_7_interleave_0 = const()[name = string("shifted_key_7_interleave_0"), val = bool(false)]; + tensor shifted_key_7_cast_fp16 = concat(axis = var_4213, interleave = shifted_key_7_interleave_0, values = (var_4186_cast_fp16, key_states_33_cast_fp16))[name = string("shifted_key_7_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([3, 0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([4, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = shifted_key_7_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_162_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_162")]; + tensor value_slice_7_begin_0 = const()[name = string("value_slice_7_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor value_slice_7_end_0 = const()[name = string("value_slice_7_end_0"), val = tensor([26, 1, 512, 256])]; + tensor value_slice_7_end_mask_0 = const()[name = string("value_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_7_cast_fp16 = slice_by_index(begin = value_slice_7_begin_0, end = value_slice_7_end_0, end_mask = value_slice_7_end_mask_0, x = coreml_update_state_58)[name = string("value_slice_7_cast_fp16")]; + tensor var_4256_begin_0 = const()[name = string("op_4256_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4256_end_0 = const()[name = string("op_4256_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_4256_end_mask_0 = const()[name = string("op_4256_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4256_cast_fp16 = slice_by_index(begin = var_4256_begin_0, end = var_4256_end_0, end_mask = var_4256_end_mask_0, x = value_slice_7_cast_fp16)[name = string("op_4256_cast_fp16")]; + int32 var_4283 = const()[name = string("op_4283"), val = int32(2)]; + bool shifted_value_7_interleave_0 = const()[name = string("shifted_value_7_interleave_0"), val = bool(false)]; + tensor value_states_27 = transpose(perm = var_4028, x = var_4023)[name = string("transpose_205")]; + tensor shifted_value_7_cast_fp16 = concat(axis = var_4283, interleave = shifted_value_7_interleave_0, values = (var_4256_cast_fp16, value_states_27))[name = string("shifted_value_7_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([25, 0, 0, 0])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([26, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_44, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_45, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = shifted_value_7_cast_fp16, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_163_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_163")]; + tensor var_4311_begin_0 = const()[name = string("op_4311_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_4311_end_0 = const()[name = string("op_4311_end_0"), val = tensor([4, 1, 512, 256])]; + tensor var_4311_end_mask_0 = const()[name = string("op_4311_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4311_cast_fp16 = slice_by_index(begin = var_4311_begin_0, end = var_4311_end_0, end_mask = var_4311_end_mask_0, x = coreml_update_state_59)[name = string("op_4311_cast_fp16")]; + tensor var_4318_begin_0 = const()[name = string("op_4318_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_4318_end_0 = const()[name = string("op_4318_end_0"), val = tensor([26, 1, 512, 256])]; + tensor var_4318_end_mask_0 = const()[name = string("op_4318_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4318_cast_fp16 = slice_by_index(begin = var_4318_begin_0, end = var_4318_end_0, end_mask = var_4318_end_mask_0, x = coreml_update_state_59)[name = string("op_4318_cast_fp16")]; + tensor var_4357 = const()[name = string("op_4357"), val = tensor([1, 4, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_4357, x = var_4311_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_4377 = const()[name = string("op_4377"), val = tensor([1, 4, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_4377, x = var_4318_cast_fp16)[name = string("x_59_cast_fp16")]; + bool var_4404_transpose_x_0 = const()[name = string("op_4404_transpose_x_0"), val = bool(false)]; + bool var_4404_transpose_y_0 = const()[name = string("op_4404_transpose_y_0"), val = bool(true)]; + tensor var_4404 = matmul(transpose_x = var_4404_transpose_x_0, transpose_y = var_4404_transpose_y_0, x = query_states_27_cast_fp16, y = x_53_cast_fp16)[name = string("op_4404")]; + fp16 var_4405_to_fp16 = const()[name = string("op_4405_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_4404, y = var_4405_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = mask_slice_1)[name = string("attn_weights_15_cast_fp16")]; + int32 var_4440 = const()[name = string("op_4440"), val = int32(-1)]; + tensor var_4442_cast_fp16 = softmax(axis = var_4440, x = attn_weights_15_cast_fp16)[name = string("op_4442_cast_fp16")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([4, 64, 512])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_50, x = var_4442_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_51 = const()[name = string("concat_51"), val = tensor([4, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_51, x = x_59_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_55 = const()[name = string("concat_55"), val = tensor([1, 4, 64, 256])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_55, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_4454_perm_0 = const()[name = string("op_4454_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4473 = const()[name = string("op_4473"), val = tensor([1, 64, 1024])]; + tensor var_4454_cast_fp16 = transpose(perm = var_4454_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_204")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_4473, x = var_4454_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_4478 = const()[name = string("op_4478"), val = tensor([0, 2, 1])]; + string var_4494_pad_type_0 = const()[name = string("op_4494_pad_type_0"), val = string("valid")]; + int32 var_4494_groups_0 = const()[name = string("op_4494_groups_0"), val = int32(1)]; + tensor var_4494_strides_0 = const()[name = string("op_4494_strides_0"), val = tensor([1])]; + tensor var_4494_pad_0 = const()[name = string("op_4494_pad_0"), val = tensor([0, 0])]; + tensor var_4494_dilations_0 = const()[name = string("op_4494_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437713024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597824))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4479_cast_fp16 = transpose(perm = var_4478, x = attn_output_35_cast_fp16)[name = string("transpose_203")]; + tensor var_4494_cast_fp16 = conv(dilations = var_4494_dilations_0, groups = var_4494_groups_0, pad = var_4494_pad_0, pad_type = var_4494_pad_type_0, strides = var_4494_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_4479_cast_fp16)[name = string("op_4494_cast_fp16")]; + tensor var_4498 = const()[name = string("op_4498"), val = tensor([0, 2, 1])]; + int32 var_4509 = const()[name = string("op_4509"), val = int32(-1)]; + fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_61_cast_fp16 = transpose(perm = var_4498, x = var_4494_cast_fp16)[name = string("transpose_202")]; + tensor var_4511_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_4511_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_4509, interleave = input_71_interleave_0, values = (hidden_states_61_cast_fp16, var_4511_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_4506_to_fp16 = const()[name = string("op_4506_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4506_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; + tensor var_4525_to_fp16 = const()[name = string("op_4525_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438634752)))]; + tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4525_to_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + int32 var_4538 = const()[name = string("op_4538"), val = int32(-1)]; + fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4540_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_160_promoted_to_fp16)[name = string("op_4540_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_4538, interleave = input_73_interleave_0, values = (hidden_states_63_cast_fp16, var_4540_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4535_to_fp16 = const()[name = string("op_4535_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4535_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; + tensor var_4554_to_fp16 = const()[name = string("op_4554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438637120)))]; + tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4554_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_4566 = const()[name = string("op_4566"), val = tensor([0, 2, 1])]; + tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; + tensor var_4567_cast_fp16 = transpose(perm = var_4566, x = x_61_cast_fp16)[name = string("transpose_201")]; + tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4567_cast_fp16)[name = string("input_75_cast_fp16")]; + string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; + tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; + tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; + int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438639488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444611520))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450804800))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; + string var_4592_mode_0 = const()[name = string("op_4592_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4592_cast_fp16 = gelu(mode = var_4592_mode_0, x = x_63_cast_fp16)[name = string("op_4592_cast_fp16")]; + tensor input_77_cast_fp16 = mul(x = var_4592_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451026048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456998080))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; + tensor var_4600_axes_0 = const()[name = string("op_4600_axes_0"), val = tensor([2])]; + tensor var_4600_cast_fp16 = squeeze(axes = var_4600_axes_0, x = e_7_cast_fp16)[name = string("op_4600_cast_fp16")]; + tensor var_4601 = const()[name = string("op_4601"), val = tensor([0, 2, 1])]; + int32 var_4612 = const()[name = string("op_4612"), val = int32(-1)]; + fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_65_cast_fp16 = transpose(perm = var_4601, x = var_4600_cast_fp16)[name = string("transpose_200")]; + tensor var_4614_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4614_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_4612, interleave = input_79_interleave_0, values = (hidden_states_65_cast_fp16, var_4614_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4609_to_fp16 = const()[name = string("op_4609_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4609_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor var_4628_to_fp16 = const()[name = string("op_4628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457035008)))]; + tensor hidden_states_67_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4628_to_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = hidden_states_67_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_4682 = const()[name = string("op_4682"), val = int32(-1)]; + fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4684_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4684_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_4682, interleave = input_81_interleave_0, values = (hidden_states_69_cast_fp16, var_4684_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4679_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor var_4698_to_fp16 = const()[name = string("op_4698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457037376)))]; + tensor hidden_states_71_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4698_to_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor var_4709 = const()[name = string("op_4709"), val = tensor([0, 2, 1])]; + tensor var_4712_axes_0 = const()[name = string("op_4712_axes_0"), val = tensor([2])]; + tensor var_4710_cast_fp16 = transpose(perm = var_4709, x = hidden_states_71_cast_fp16)[name = string("transpose_199")]; + tensor var_4712_cast_fp16 = expand_dims(axes = var_4712_axes_0, x = var_4710_cast_fp16)[name = string("op_4712_cast_fp16")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4712_cast_fp16)[name = string("query_states_33")]; + string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; + tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; + tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; + int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; + tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4712_cast_fp16)[name = string("key_states_41")]; + string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; + tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; + tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; + int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; + tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4712_cast_fp16)[name = string("value_states_33")]; + tensor var_4754 = const()[name = string("op_4754"), val = tensor([1, 4, 256, 64])]; + tensor var_4755 = reshape(shape = var_4754, x = query_states_33)[name = string("op_4755")]; + tensor var_4760 = const()[name = string("op_4760"), val = tensor([0, 1, 3, 2])]; + tensor var_4765 = const()[name = string("op_4765"), val = tensor([1, 1, 256, 64])]; + tensor var_4766 = reshape(shape = var_4765, x = key_states_41)[name = string("op_4766")]; + tensor var_4771 = const()[name = string("op_4771"), val = tensor([0, 1, 3, 2])]; + tensor var_4776 = const()[name = string("op_4776"), val = tensor([1, 1, 256, 64])]; + tensor var_4777 = reshape(shape = var_4776, x = value_states_33)[name = string("op_4777")]; + tensor var_4782 = const()[name = string("op_4782"), val = tensor([0, 1, 3, 2])]; + int32 var_4793 = const()[name = string("op_4793"), val = int32(-1)]; + fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_73 = transpose(perm = var_4760, x = var_4755)[name = string("transpose_198")]; + tensor var_4795 = mul(x = hidden_states_73, y = const_174_promoted)[name = string("op_4795")]; + bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; + tensor input_85 = concat(axis = var_4793, interleave = input_85_interleave_0, values = (hidden_states_73, var_4795))[name = string("input_85")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4790_to_fp16 = const()[name = string("op_4790_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4790_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor var_4809_to_fp16 = const()[name = string("op_4809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457039744)))]; + tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4809_to_fp16)[name = string("q_9_cast_fp16")]; + int32 var_4820 = const()[name = string("op_4820"), val = int32(-1)]; + fp16 const_178_promoted = const()[name = string("const_178_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_75 = transpose(perm = var_4771, x = var_4766)[name = string("transpose_197")]; + tensor var_4822 = mul(x = hidden_states_75, y = const_178_promoted)[name = string("op_4822")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_4820, interleave = input_87_interleave_0, values = (hidden_states_75, var_4822))[name = string("input_87")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4817_to_fp16 = const()[name = string("op_4817_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4817_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor var_4836_to_fp16 = const()[name = string("op_4836_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040320)))]; + tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4836_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_4850_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5)[name = string("op_4850_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4871_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_4871_cast_fp16")]; + int32 var_4873 = const()[name = string("op_4873"), val = int32(-1)]; + bool var_4874_interleave_0 = const()[name = string("op_4874_interleave_0"), val = bool(false)]; + tensor var_4874_cast_fp16 = concat(axis = var_4873, interleave = var_4874_interleave_0, values = (var_4871_cast_fp16, x1_17_cast_fp16))[name = string("op_4874_cast_fp16")]; + tensor var_4875_cast_fp16 = mul(x = var_4874_cast_fp16, y = sin_5)[name = string("op_4875_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_4850_cast_fp16, y = var_4875_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor var_4878_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5)[name = string("op_4878_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4899_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_187_promoted_to_fp16)[name = string("op_4899_cast_fp16")]; + int32 var_4901 = const()[name = string("op_4901"), val = int32(-1)]; + bool var_4902_interleave_0 = const()[name = string("op_4902_interleave_0"), val = bool(false)]; + tensor var_4902_cast_fp16 = concat(axis = var_4901, interleave = var_4902_interleave_0, values = (var_4899_cast_fp16, x1_19_cast_fp16))[name = string("op_4902_cast_fp16")]; + tensor var_4903_cast_fp16 = mul(x = var_4902_cast_fp16, y = sin_5)[name = string("op_4903_cast_fp16")]; + tensor key_states_43_cast_fp16 = add(x = var_4878_cast_fp16, y = var_4903_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor key_slice_9_begin_0 = const()[name = string("key_slice_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor key_slice_9_end_0 = const()[name = string("key_slice_9_end_0"), val = tensor([5, 1, 512, 256])]; + tensor key_slice_9_end_mask_0 = const()[name = string("key_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_9_cast_fp16 = slice_by_index(begin = key_slice_9_begin_0, end = key_slice_9_end_0, end_mask = key_slice_9_end_mask_0, x = coreml_update_state_59)[name = string("key_slice_9_cast_fp16")]; + tensor var_4940_begin_0 = const()[name = string("op_4940_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4940_end_0 = const()[name = string("op_4940_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_4940_end_mask_0 = const()[name = string("op_4940_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4940_cast_fp16 = slice_by_index(begin = var_4940_begin_0, end = var_4940_end_0, end_mask = var_4940_end_mask_0, x = key_slice_9_cast_fp16)[name = string("op_4940_cast_fp16")]; + int32 var_4967 = const()[name = string("op_4967"), val = int32(2)]; + bool shifted_key_9_interleave_0 = const()[name = string("shifted_key_9_interleave_0"), val = bool(false)]; + tensor shifted_key_9_cast_fp16 = concat(axis = var_4967, interleave = shifted_key_9_interleave_0, values = (var_4940_cast_fp16, key_states_43_cast_fp16))[name = string("shifted_key_9_cast_fp16")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([4, 0, 0, 0])]; + tensor concat_57 = const()[name = string("concat_57"), val = tensor([5, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = shifted_key_9_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_164_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_164")]; + tensor value_slice_9_begin_0 = const()[name = string("value_slice_9_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor value_slice_9_end_0 = const()[name = string("value_slice_9_end_0"), val = tensor([27, 1, 512, 256])]; + tensor value_slice_9_end_mask_0 = const()[name = string("value_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_9_cast_fp16 = slice_by_index(begin = value_slice_9_begin_0, end = value_slice_9_end_0, end_mask = value_slice_9_end_mask_0, x = coreml_update_state_60)[name = string("value_slice_9_cast_fp16")]; + tensor var_5010_begin_0 = const()[name = string("op_5010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_5010_end_0 = const()[name = string("op_5010_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_5010_end_mask_0 = const()[name = string("op_5010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5010_cast_fp16 = slice_by_index(begin = var_5010_begin_0, end = var_5010_end_0, end_mask = var_5010_end_mask_0, x = value_slice_9_cast_fp16)[name = string("op_5010_cast_fp16")]; + int32 var_5037 = const()[name = string("op_5037"), val = int32(2)]; + bool shifted_value_9_interleave_0 = const()[name = string("shifted_value_9_interleave_0"), val = bool(false)]; + tensor value_states_35 = transpose(perm = var_4782, x = var_4777)[name = string("transpose_196")]; + tensor shifted_value_9_cast_fp16 = concat(axis = var_5037, interleave = shifted_value_9_interleave_0, values = (var_5010_cast_fp16, value_states_35))[name = string("shifted_value_9_cast_fp16")]; + tensor concat_58 = const()[name = string("concat_58"), val = tensor([26, 0, 0, 0])]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([27, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = shifted_value_9_cast_fp16, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_165_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_165")]; + tensor var_5065_begin_0 = const()[name = string("op_5065_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_5065_end_0 = const()[name = string("op_5065_end_0"), val = tensor([5, 1, 512, 256])]; + tensor var_5065_end_mask_0 = const()[name = string("op_5065_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5065_cast_fp16 = slice_by_index(begin = var_5065_begin_0, end = var_5065_end_0, end_mask = var_5065_end_mask_0, x = coreml_update_state_61)[name = string("op_5065_cast_fp16")]; + tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([27, 1, 512, 256])]; + tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_61)[name = string("op_5072_cast_fp16")]; + tensor var_5111 = const()[name = string("op_5111"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_5111, x = var_5065_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_5131 = const()[name = string("op_5131"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_5131, x = var_5072_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_5158_transpose_x_0 = const()[name = string("op_5158_transpose_x_0"), val = bool(false)]; + bool var_5158_transpose_y_0 = const()[name = string("op_5158_transpose_y_0"), val = bool(true)]; + tensor var_5158 = matmul(transpose_x = var_5158_transpose_x_0, transpose_y = var_5158_transpose_y_0, x = query_states_35_cast_fp16, y = x_69_cast_fp16)[name = string("op_5158")]; + fp16 var_5159_to_fp16 = const()[name = string("op_5159_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_5158, y = var_5159_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = mask_slice_1)[name = string("attn_weights_19_cast_fp16")]; + int32 var_5194 = const()[name = string("op_5194"), val = int32(-1)]; + tensor var_5196_cast_fp16 = softmax(axis = var_5194, x = attn_weights_19_cast_fp16)[name = string("op_5196_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([4, 64, 512])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_64, x = var_5196_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([4, 512, 256])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_65, x = x_75_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_69 = const()[name = string("concat_69"), val = tensor([1, 4, 64, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_69, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_5208_perm_0 = const()[name = string("op_5208_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5227 = const()[name = string("op_5227"), val = tensor([1, 64, 1024])]; + tensor var_5208_cast_fp16 = transpose(perm = var_5208_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_195")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_5227, x = var_5208_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_5232 = const()[name = string("op_5232"), val = tensor([0, 2, 1])]; + string var_5248_pad_type_0 = const()[name = string("op_5248_pad_type_0"), val = string("valid")]; + int32 var_5248_groups_0 = const()[name = string("op_5248_groups_0"), val = int32(1)]; + tensor var_5248_strides_0 = const()[name = string("op_5248_strides_0"), val = tensor([1])]; + tensor var_5248_pad_0 = const()[name = string("op_5248_pad_0"), val = tensor([0, 0])]; + tensor var_5248_dilations_0 = const()[name = string("op_5248_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457040896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457925696))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5233_cast_fp16 = transpose(perm = var_5232, x = attn_output_45_cast_fp16)[name = string("transpose_194")]; + tensor var_5248_cast_fp16 = conv(dilations = var_5248_dilations_0, groups = var_5248_groups_0, pad = var_5248_pad_0, pad_type = var_5248_pad_type_0, strides = var_5248_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_5233_cast_fp16)[name = string("op_5248_cast_fp16")]; + tensor var_5252 = const()[name = string("op_5252"), val = tensor([0, 2, 1])]; + int32 var_5263 = const()[name = string("op_5263"), val = int32(-1)]; + fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_77_cast_fp16 = transpose(perm = var_5252, x = var_5248_cast_fp16)[name = string("transpose_193")]; + tensor var_5265_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_198_promoted_to_fp16)[name = string("op_5265_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_5263, interleave = input_91_interleave_0, values = (hidden_states_77_cast_fp16, var_5265_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_5260_to_fp16 = const()[name = string("op_5260_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_5260_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor var_5279_to_fp16 = const()[name = string("op_5279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457962624)))]; + tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_5279_to_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_5292 = const()[name = string("op_5292"), val = int32(-1)]; + fp16 const_202_promoted_to_fp16 = const()[name = string("const_202_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5294_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_202_promoted_to_fp16)[name = string("op_5294_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_5292, interleave = input_93_interleave_0, values = (hidden_states_79_cast_fp16, var_5294_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_5289_to_fp16 = const()[name = string("op_5289_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5289_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor var_5308_to_fp16 = const()[name = string("op_5308_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457964992)))]; + tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_5308_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_5320 = const()[name = string("op_5320"), val = tensor([0, 2, 1])]; + tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; + tensor var_5321_cast_fp16 = transpose(perm = var_5320, x = x_77_cast_fp16)[name = string("transpose_192")]; + tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_5321_cast_fp16)[name = string("input_95_cast_fp16")]; + string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; + tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; + tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; + int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463939392))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464160640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470132672))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; + string var_5346_mode_0 = const()[name = string("op_5346_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5346_cast_fp16 = gelu(mode = var_5346_mode_0, x = x_79_cast_fp16)[name = string("op_5346_cast_fp16")]; + tensor input_97_cast_fp16 = mul(x = var_5346_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470353920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476325952))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; + tensor var_5354_axes_0 = const()[name = string("op_5354_axes_0"), val = tensor([2])]; + tensor var_5354_cast_fp16 = squeeze(axes = var_5354_axes_0, x = e_9_cast_fp16)[name = string("op_5354_cast_fp16")]; + tensor var_5355 = const()[name = string("op_5355"), val = tensor([0, 2, 1])]; + int32 var_5366 = const()[name = string("op_5366"), val = int32(-1)]; + fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_81_cast_fp16 = transpose(perm = var_5355, x = var_5354_cast_fp16)[name = string("transpose_191")]; + tensor var_5368_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_5368_cast_fp16")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99_cast_fp16 = concat(axis = var_5366, interleave = input_99_interleave_0, values = (hidden_states_81_cast_fp16, var_5368_cast_fp16))[name = string("input_99_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_5363_to_fp16 = const()[name = string("op_5363_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5363_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; + tensor var_5382_to_fp16 = const()[name = string("op_5382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476362880)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_5382_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + int32 var_5399_axis_0 = const()[name = string("op_5399_axis_0"), val = int32(1)]; + int32 var_5399_batch_dims_0 = const()[name = string("op_5399_batch_dims_0"), val = int32(0)]; + bool var_5399_validate_indices_0 = const()[name = string("op_5399_validate_indices_0"), val = bool(false)]; + tensor var_5391_to_fp16 = const()[name = string("op_5391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480559616)))]; + tensor var_5399_cast_fp16_cast_uint16 = gather(axis = var_5399_axis_0, batch_dims = var_5399_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5399_validate_indices_0, x = var_5391_to_fp16)[name = string("op_5399_cast_fp16_cast_uint16")]; + tensor var_5403 = const()[name = string("op_5403"), val = tensor([1, 64, 1, 256])]; + tensor cos_31_cast_fp16 = reshape(shape = var_5403, x = var_5399_cast_fp16_cast_uint16)[name = string("cos_31_cast_fp16")]; + int32 var_5413_axis_0 = const()[name = string("op_5413_axis_0"), val = int32(1)]; + int32 var_5413_batch_dims_0 = const()[name = string("op_5413_batch_dims_0"), val = int32(0)]; + bool var_5413_validate_indices_0 = const()[name = string("op_5413_validate_indices_0"), val = bool(false)]; + tensor var_5405_to_fp16 = const()[name = string("op_5405_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476365248)))]; + tensor var_5413_cast_fp16_cast_uint16 = gather(axis = var_5413_axis_0, batch_dims = var_5413_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5413_validate_indices_0, x = var_5405_to_fp16)[name = string("op_5413_cast_fp16_cast_uint16")]; + tensor var_5417 = const()[name = string("op_5417"), val = tensor([1, 64, 1, 256])]; + tensor sin_31_cast_fp16 = reshape(shape = var_5417, x = var_5413_cast_fp16_cast_uint16)[name = string("sin_31_cast_fp16")]; + int32 var_5438 = const()[name = string("op_5438"), val = int32(-1)]; + fp16 const_211_promoted_to_fp16 = const()[name = string("const_211_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5440_cast_fp16 = mul(x = hidden_states_85_cast_fp16, y = const_211_promoted_to_fp16)[name = string("op_5440_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_5438, interleave = input_101_interleave_0, values = (hidden_states_85_cast_fp16, var_5440_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_5435_to_fp16 = const()[name = string("op_5435_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5435_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; + tensor var_5454_to_fp16 = const()[name = string("op_5454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484753984)))]; + tensor hidden_states_87_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_5454_to_fp16)[name = string("hidden_states_87_cast_fp16")]; + tensor var_5465 = const()[name = string("op_5465"), val = tensor([0, 2, 1])]; + tensor var_5468_axes_0 = const()[name = string("op_5468_axes_0"), val = tensor([2])]; + tensor var_5466_cast_fp16 = transpose(perm = var_5465, x = hidden_states_87_cast_fp16)[name = string("transpose_190")]; + tensor var_5468_cast_fp16 = expand_dims(axes = var_5468_axes_0, x = var_5466_cast_fp16)[name = string("op_5468_cast_fp16")]; + string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; + tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; + tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; + int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; + tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_5468_cast_fp16)[name = string("query_states_41")]; + string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; + tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; + tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; + int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; + tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_5468_cast_fp16)[name = string("key_states_51")]; + string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; + tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; + tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; + int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; + tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_5468_cast_fp16)[name = string("value_states_41")]; + tensor var_5510 = const()[name = string("op_5510"), val = tensor([1, 4, 256, 64])]; + tensor var_5511 = reshape(shape = var_5510, x = query_states_41)[name = string("op_5511")]; + tensor var_5516 = const()[name = string("op_5516"), val = tensor([0, 1, 3, 2])]; + tensor var_5521 = const()[name = string("op_5521"), val = tensor([1, 1, 256, 64])]; + tensor var_5522 = reshape(shape = var_5521, x = key_states_51)[name = string("op_5522")]; + tensor var_5527 = const()[name = string("op_5527"), val = tensor([0, 1, 3, 2])]; + tensor var_5532 = const()[name = string("op_5532"), val = tensor([1, 1, 256, 64])]; + tensor var_5533 = reshape(shape = var_5532, x = value_states_41)[name = string("op_5533")]; + tensor var_5538 = const()[name = string("op_5538"), val = tensor([0, 1, 3, 2])]; + int32 var_5549 = const()[name = string("op_5549"), val = int32(-1)]; + fp16 const_216_promoted = const()[name = string("const_216_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_89 = transpose(perm = var_5516, x = var_5511)[name = string("transpose_189")]; + tensor var_5551 = mul(x = hidden_states_89, y = const_216_promoted)[name = string("op_5551")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105 = concat(axis = var_5549, interleave = input_105_interleave_0, values = (hidden_states_89, var_5551))[name = string("input_105")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_5546_to_fp16 = const()[name = string("op_5546_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5546_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; + tensor var_5565_to_fp16 = const()[name = string("op_5565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756352)))]; + tensor q_11_cast_fp16 = mul(x = normed_127, y = var_5565_to_fp16)[name = string("q_11_cast_fp16")]; + int32 var_5576 = const()[name = string("op_5576"), val = int32(-1)]; + fp16 const_220_promoted = const()[name = string("const_220_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_91 = transpose(perm = var_5527, x = var_5522)[name = string("transpose_188")]; + tensor var_5578 = mul(x = hidden_states_91, y = const_220_promoted)[name = string("op_5578")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107 = concat(axis = var_5576, interleave = input_107_interleave_0, values = (hidden_states_91, var_5578))[name = string("input_107")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_5573_to_fp16 = const()[name = string("op_5573_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5573_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; + tensor var_5592_to_fp16 = const()[name = string("op_5592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484756928)))]; + tensor k_11_cast_fp16 = mul(x = normed_131, y = var_5592_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_5598 = const()[name = string("op_5598"), val = tensor([0, 2, 1, 3])]; + tensor var_5604 = const()[name = string("op_5604"), val = tensor([0, 2, 1, 3])]; + tensor cos_35 = transpose(perm = var_5598, x = cos_31_cast_fp16)[name = string("transpose_187")]; + tensor var_5606_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_35)[name = string("op_5606_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5627_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_226_promoted_to_fp16)[name = string("op_5627_cast_fp16")]; + int32 var_5629 = const()[name = string("op_5629"), val = int32(-1)]; + bool var_5630_interleave_0 = const()[name = string("op_5630_interleave_0"), val = bool(false)]; + tensor var_5630_cast_fp16 = concat(axis = var_5629, interleave = var_5630_interleave_0, values = (var_5627_cast_fp16, x1_21_cast_fp16))[name = string("op_5630_cast_fp16")]; + tensor sin_35 = transpose(perm = var_5604, x = sin_31_cast_fp16)[name = string("transpose_186")]; + tensor var_5631_cast_fp16 = mul(x = var_5630_cast_fp16, y = sin_35)[name = string("op_5631_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_5606_cast_fp16, y = var_5631_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor var_5634_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_35)[name = string("op_5634_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_229_promoted_to_fp16 = const()[name = string("const_229_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5655_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_229_promoted_to_fp16)[name = string("op_5655_cast_fp16")]; + int32 var_5657 = const()[name = string("op_5657"), val = int32(-1)]; + bool var_5658_interleave_0 = const()[name = string("op_5658_interleave_0"), val = bool(false)]; + tensor var_5658_cast_fp16 = concat(axis = var_5657, interleave = var_5658_interleave_0, values = (var_5655_cast_fp16, x1_23_cast_fp16))[name = string("op_5658_cast_fp16")]; + tensor var_5659_cast_fp16 = mul(x = var_5658_cast_fp16, y = sin_35)[name = string("op_5659_cast_fp16")]; + tensor key_states_53_cast_fp16 = add(x = var_5634_cast_fp16, y = var_5659_cast_fp16)[name = string("key_states_53_cast_fp16")]; + tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; + tensor concat_70 = const()[name = string("concat_70"), val = tensor([0, 0, 0, 0])]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_53_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_166_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_166")]; + tensor concat_72 = const()[name = string("concat_72"), val = tensor([4, 0, 0, 0])]; + tensor concat_73 = const()[name = string("concat_73"), val = tensor([5, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_43 = transpose(perm = var_5538, x = var_5533)[name = string("transpose_185")]; + tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_72, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_73, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = value_states_43, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_167_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_167")]; + tensor var_5758_begin_0 = const()[name = string("op_5758_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5758_end_0 = const()[name = string("op_5758_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_5758_end_mask_0 = const()[name = string("op_5758_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5758_cast_fp16 = slice_by_index(begin = var_5758_begin_0, end = var_5758_end_0, end_mask = var_5758_end_mask_0, x = coreml_update_state_63)[name = string("op_5758_cast_fp16")]; + tensor var_5765_begin_0 = const()[name = string("op_5765_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_5765_end_0 = const()[name = string("op_5765_end_0"), val = tensor([5, 1, 4096, 256])]; + tensor var_5765_end_mask_0 = const()[name = string("op_5765_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5765_cast_fp16 = slice_by_index(begin = var_5765_begin_0, end = var_5765_end_0, end_mask = var_5765_end_mask_0, x = coreml_update_state_63)[name = string("op_5765_cast_fp16")]; + tensor var_5804 = const()[name = string("op_5804"), val = tensor([1, 4, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_5804, x = var_5758_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_5824 = const()[name = string("op_5824"), val = tensor([1, 4, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_5824, x = var_5765_cast_fp16)[name = string("x_91_cast_fp16")]; + bool var_5851_transpose_x_0 = const()[name = string("op_5851_transpose_x_0"), val = bool(false)]; + bool var_5851_transpose_y_0 = const()[name = string("op_5851_transpose_y_0"), val = bool(true)]; + tensor var_5851 = matmul(transpose_x = var_5851_transpose_x_0, transpose_y = var_5851_transpose_y_0, x = query_states_43_cast_fp16, y = x_85_cast_fp16)[name = string("op_5851")]; + fp16 var_5852_to_fp16 = const()[name = string("op_5852_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_5851, y = var_5852_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; + int32 var_5887 = const()[name = string("op_5887"), val = int32(-1)]; + tensor var_5889_cast_fp16 = softmax(axis = var_5887, x = attn_weights_23_cast_fp16)[name = string("op_5889_cast_fp16")]; + tensor concat_78 = const()[name = string("concat_78"), val = tensor([4, 64, 4096])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_78, x = var_5889_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_79 = const()[name = string("concat_79"), val = tensor([4, 4096, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_79, x = x_91_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_83 = const()[name = string("concat_83"), val = tensor([1, 4, 64, 256])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_83, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_5901_perm_0 = const()[name = string("op_5901_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5920 = const()[name = string("op_5920"), val = tensor([1, 64, 1024])]; + tensor var_5901_cast_fp16 = transpose(perm = var_5901_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_184")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_5920, x = var_5901_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_5925 = const()[name = string("op_5925"), val = tensor([0, 2, 1])]; + string var_5941_pad_type_0 = const()[name = string("op_5941_pad_type_0"), val = string("valid")]; + int32 var_5941_groups_0 = const()[name = string("op_5941_groups_0"), val = int32(1)]; + tensor var_5941_strides_0 = const()[name = string("op_5941_strides_0"), val = tensor([1])]; + tensor var_5941_pad_0 = const()[name = string("op_5941_pad_0"), val = tensor([0, 0])]; + tensor var_5941_dilations_0 = const()[name = string("op_5941_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485642304))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5926_cast_fp16 = transpose(perm = var_5925, x = attn_output_55_cast_fp16)[name = string("transpose_183")]; + tensor var_5941_cast_fp16 = conv(dilations = var_5941_dilations_0, groups = var_5941_groups_0, pad = var_5941_pad_0, pad_type = var_5941_pad_type_0, strides = var_5941_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5926_cast_fp16)[name = string("op_5941_cast_fp16")]; + tensor var_5945 = const()[name = string("op_5945"), val = tensor([0, 2, 1])]; + int32 var_5956 = const()[name = string("op_5956"), val = int32(-1)]; + fp16 const_241_promoted_to_fp16 = const()[name = string("const_241_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_93_cast_fp16 = transpose(perm = var_5945, x = var_5941_cast_fp16)[name = string("transpose_182")]; + tensor var_5958_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_241_promoted_to_fp16)[name = string("op_5958_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_5956, interleave = input_111_interleave_0, values = (hidden_states_93_cast_fp16, var_5958_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5953_to_fp16 = const()[name = string("op_5953_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5953_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; + tensor var_5972_to_fp16 = const()[name = string("op_5972_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485679232)))]; + tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5972_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; + int32 var_5985 = const()[name = string("op_5985"), val = int32(-1)]; + fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5987_cast_fp16 = mul(x = hidden_states_95_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5987_cast_fp16")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113_cast_fp16 = concat(axis = var_5985, interleave = input_113_interleave_0, values = (hidden_states_95_cast_fp16, var_5987_cast_fp16))[name = string("input_113_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5982_to_fp16 = const()[name = string("op_5982_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5982_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; + tensor var_6001_to_fp16 = const()[name = string("op_6001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485681600)))]; + tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_6001_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_6013 = const()[name = string("op_6013"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_6014_cast_fp16 = transpose(perm = var_6013, x = x_93_cast_fp16)[name = string("transpose_181")]; + tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_6014_cast_fp16)[name = string("input_115_cast_fp16")]; + string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; + tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; + tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; + int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485683968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491656000))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491877248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497849280))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; + string var_6039_mode_0 = const()[name = string("op_6039_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_6039_cast_fp16 = gelu(mode = var_6039_mode_0, x = x_95_cast_fp16)[name = string("op_6039_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = var_6039_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498070528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504042560))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; + tensor var_6047_axes_0 = const()[name = string("op_6047_axes_0"), val = tensor([2])]; + tensor var_6047_cast_fp16 = squeeze(axes = var_6047_axes_0, x = e_11_cast_fp16)[name = string("op_6047_cast_fp16")]; + tensor var_6048 = const()[name = string("op_6048"), val = tensor([0, 2, 1])]; + int32 var_6059 = const()[name = string("op_6059"), val = int32(-1)]; + fp16 const_249_promoted_to_fp16 = const()[name = string("const_249_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_97_cast_fp16 = transpose(perm = var_6048, x = var_6047_cast_fp16)[name = string("transpose_180")]; + tensor var_6061_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_249_promoted_to_fp16)[name = string("op_6061_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_6059, interleave = input_119_interleave_0, values = (hidden_states_97_cast_fp16, var_6061_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_6056_to_fp16 = const()[name = string("op_6056_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_6056_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor var_6075_to_fp16 = const()[name = string("op_6075_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504079488)))]; + tensor hidden_states_99_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_6075_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + int32 var_6129 = const()[name = string("op_6129"), val = int32(-1)]; + fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6131_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_6131_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_6129, interleave = input_121_interleave_0, values = (hidden_states_101_cast_fp16, var_6131_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_6126_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor var_6145_to_fp16 = const()[name = string("op_6145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504081856)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_6145_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_6156 = const()[name = string("op_6156"), val = tensor([0, 2, 1])]; + tensor var_6159_axes_0 = const()[name = string("op_6159_axes_0"), val = tensor([2])]; + tensor var_6157_cast_fp16 = transpose(perm = var_6156, x = hidden_states_103_cast_fp16)[name = string("transpose_179")]; + tensor var_6159_cast_fp16 = expand_dims(axes = var_6159_axes_0, x = var_6157_cast_fp16)[name = string("op_6159_cast_fp16")]; + string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; + tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; + tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; + int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; + tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_6159_cast_fp16)[name = string("query_states_49")]; + string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; + tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; + tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; + int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; + tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_6159_cast_fp16)[name = string("key_states_61")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_6159_cast_fp16)[name = string("value_states_49")]; + tensor var_6201 = const()[name = string("op_6201"), val = tensor([1, 4, 256, 64])]; + tensor var_6202 = reshape(shape = var_6201, x = query_states_49)[name = string("op_6202")]; + tensor var_6207 = const()[name = string("op_6207"), val = tensor([0, 1, 3, 2])]; + tensor var_6212 = const()[name = string("op_6212"), val = tensor([1, 1, 256, 64])]; + tensor var_6213 = reshape(shape = var_6212, x = key_states_61)[name = string("op_6213")]; + tensor var_6218 = const()[name = string("op_6218"), val = tensor([0, 1, 3, 2])]; + tensor var_6223 = const()[name = string("op_6223"), val = tensor([1, 1, 256, 64])]; + tensor var_6224 = reshape(shape = var_6223, x = value_states_49)[name = string("op_6224")]; + tensor var_6229 = const()[name = string("op_6229"), val = tensor([0, 1, 3, 2])]; + int32 var_6240 = const()[name = string("op_6240"), val = int32(-1)]; + fp16 const_259_promoted = const()[name = string("const_259_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_105 = transpose(perm = var_6207, x = var_6202)[name = string("transpose_178")]; + tensor var_6242 = mul(x = hidden_states_105, y = const_259_promoted)[name = string("op_6242")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_6240, interleave = input_125_interleave_0, values = (hidden_states_105, var_6242))[name = string("input_125")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_6237_to_fp16 = const()[name = string("op_6237_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_6237_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor var_6256_to_fp16 = const()[name = string("op_6256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084224)))]; + tensor q_13_cast_fp16 = mul(x = normed_151, y = var_6256_to_fp16)[name = string("q_13_cast_fp16")]; + int32 var_6267 = const()[name = string("op_6267"), val = int32(-1)]; + fp16 const_263_promoted = const()[name = string("const_263_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_107 = transpose(perm = var_6218, x = var_6213)[name = string("transpose_177")]; + tensor var_6269 = mul(x = hidden_states_107, y = const_263_promoted)[name = string("op_6269")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_6267, interleave = input_127_interleave_0, values = (hidden_states_107, var_6269))[name = string("input_127")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_6264_to_fp16 = const()[name = string("op_6264_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_6264_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor var_6283_to_fp16 = const()[name = string("op_6283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504084800)))]; + tensor k_13_cast_fp16 = mul(x = normed_155, y = var_6283_to_fp16)[name = string("k_13_cast_fp16")]; + tensor var_6297_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5)[name = string("op_6297_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6318_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_269_promoted_to_fp16)[name = string("op_6318_cast_fp16")]; + int32 var_6320 = const()[name = string("op_6320"), val = int32(-1)]; + bool var_6321_interleave_0 = const()[name = string("op_6321_interleave_0"), val = bool(false)]; + tensor var_6321_cast_fp16 = concat(axis = var_6320, interleave = var_6321_interleave_0, values = (var_6318_cast_fp16, x1_25_cast_fp16))[name = string("op_6321_cast_fp16")]; + tensor var_6322_cast_fp16 = mul(x = var_6321_cast_fp16, y = sin_5)[name = string("op_6322_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_6297_cast_fp16, y = var_6322_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor var_6325_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5)[name = string("op_6325_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_272_promoted_to_fp16 = const()[name = string("const_272_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6346_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_272_promoted_to_fp16)[name = string("op_6346_cast_fp16")]; + int32 var_6348 = const()[name = string("op_6348"), val = int32(-1)]; + bool var_6349_interleave_0 = const()[name = string("op_6349_interleave_0"), val = bool(false)]; + tensor var_6349_cast_fp16 = concat(axis = var_6348, interleave = var_6349_interleave_0, values = (var_6346_cast_fp16, x1_27_cast_fp16))[name = string("op_6349_cast_fp16")]; + tensor var_6350_cast_fp16 = mul(x = var_6349_cast_fp16, y = sin_5)[name = string("op_6350_cast_fp16")]; + tensor key_states_63_cast_fp16 = add(x = var_6325_cast_fp16, y = var_6350_cast_fp16)[name = string("key_states_63_cast_fp16")]; + tensor key_slice_11_begin_0 = const()[name = string("key_slice_11_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor key_slice_11_end_0 = const()[name = string("key_slice_11_end_0"), val = tensor([6, 1, 512, 256])]; + tensor key_slice_11_end_mask_0 = const()[name = string("key_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_11_cast_fp16 = slice_by_index(begin = key_slice_11_begin_0, end = key_slice_11_end_0, end_mask = key_slice_11_end_mask_0, x = coreml_update_state_61)[name = string("key_slice_11_cast_fp16")]; + tensor var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6387_end_0 = const()[name = string("op_6387_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = var_6387_end_0, end_mask = var_6387_end_mask_0, x = key_slice_11_cast_fp16)[name = string("op_6387_cast_fp16")]; + int32 var_6414 = const()[name = string("op_6414"), val = int32(2)]; + bool shifted_key_11_interleave_0 = const()[name = string("shifted_key_11_interleave_0"), val = bool(false)]; + tensor shifted_key_11_cast_fp16 = concat(axis = var_6414, interleave = shifted_key_11_interleave_0, values = (var_6387_cast_fp16, key_states_63_cast_fp16))[name = string("shifted_key_11_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([5, 0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([6, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_84, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_85, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = shifted_key_11_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_168_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_168")]; + tensor value_slice_11_begin_0 = const()[name = string("value_slice_11_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor value_slice_11_end_0 = const()[name = string("value_slice_11_end_0"), val = tensor([28, 1, 512, 256])]; + tensor value_slice_11_end_mask_0 = const()[name = string("value_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_11_cast_fp16 = slice_by_index(begin = value_slice_11_begin_0, end = value_slice_11_end_0, end_mask = value_slice_11_end_mask_0, x = coreml_update_state_64)[name = string("value_slice_11_cast_fp16")]; + tensor var_6457_begin_0 = const()[name = string("op_6457_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6457_end_0 = const()[name = string("op_6457_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_6457_end_mask_0 = const()[name = string("op_6457_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6457_cast_fp16 = slice_by_index(begin = var_6457_begin_0, end = var_6457_end_0, end_mask = var_6457_end_mask_0, x = value_slice_11_cast_fp16)[name = string("op_6457_cast_fp16")]; + int32 var_6484 = const()[name = string("op_6484"), val = int32(2)]; + bool shifted_value_11_interleave_0 = const()[name = string("shifted_value_11_interleave_0"), val = bool(false)]; + tensor value_states_51 = transpose(perm = var_6229, x = var_6224)[name = string("transpose_176")]; + tensor shifted_value_11_cast_fp16 = concat(axis = var_6484, interleave = shifted_value_11_interleave_0, values = (var_6457_cast_fp16, value_states_51))[name = string("shifted_value_11_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([27, 0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([28, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = shifted_value_11_cast_fp16, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_169_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_169")]; + tensor var_6512_begin_0 = const()[name = string("op_6512_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_6512_end_0 = const()[name = string("op_6512_end_0"), val = tensor([6, 1, 512, 256])]; + tensor var_6512_end_mask_0 = const()[name = string("op_6512_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6512_cast_fp16 = slice_by_index(begin = var_6512_begin_0, end = var_6512_end_0, end_mask = var_6512_end_mask_0, x = coreml_update_state_65)[name = string("op_6512_cast_fp16")]; + tensor var_6519_begin_0 = const()[name = string("op_6519_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_6519_end_0 = const()[name = string("op_6519_end_0"), val = tensor([28, 1, 512, 256])]; + tensor var_6519_end_mask_0 = const()[name = string("op_6519_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6519_cast_fp16 = slice_by_index(begin = var_6519_begin_0, end = var_6519_end_0, end_mask = var_6519_end_mask_0, x = coreml_update_state_65)[name = string("op_6519_cast_fp16")]; + tensor var_6558 = const()[name = string("op_6558"), val = tensor([1, 4, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_6558, x = var_6512_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_6578 = const()[name = string("op_6578"), val = tensor([1, 4, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_6578, x = var_6519_cast_fp16)[name = string("x_107_cast_fp16")]; + bool var_6605_transpose_x_0 = const()[name = string("op_6605_transpose_x_0"), val = bool(false)]; + bool var_6605_transpose_y_0 = const()[name = string("op_6605_transpose_y_0"), val = bool(true)]; + tensor var_6605 = matmul(transpose_x = var_6605_transpose_x_0, transpose_y = var_6605_transpose_y_0, x = query_states_51_cast_fp16, y = x_101_cast_fp16)[name = string("op_6605")]; + fp16 var_6606_to_fp16 = const()[name = string("op_6606_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_6605, y = var_6606_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = mask_slice_1)[name = string("attn_weights_27_cast_fp16")]; + int32 var_6641 = const()[name = string("op_6641"), val = int32(-1)]; + tensor var_6643_cast_fp16 = softmax(axis = var_6641, x = attn_weights_27_cast_fp16)[name = string("op_6643_cast_fp16")]; + tensor concat_92 = const()[name = string("concat_92"), val = tensor([4, 64, 512])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_92, x = var_6643_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_93 = const()[name = string("concat_93"), val = tensor([4, 512, 256])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_93, x = x_107_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_97 = const()[name = string("concat_97"), val = tensor([1, 4, 64, 256])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_97, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_6655_perm_0 = const()[name = string("op_6655_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6674 = const()[name = string("op_6674"), val = tensor([1, 64, 1024])]; + tensor var_6655_cast_fp16 = transpose(perm = var_6655_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_175")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_6674, x = var_6655_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_6679 = const()[name = string("op_6679"), val = tensor([0, 2, 1])]; + string var_6695_pad_type_0 = const()[name = string("op_6695_pad_type_0"), val = string("valid")]; + int32 var_6695_groups_0 = const()[name = string("op_6695_groups_0"), val = int32(1)]; + tensor var_6695_strides_0 = const()[name = string("op_6695_strides_0"), val = tensor([1])]; + tensor var_6695_pad_0 = const()[name = string("op_6695_pad_0"), val = tensor([0, 0])]; + tensor var_6695_dilations_0 = const()[name = string("op_6695_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504085376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504970176))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6680_cast_fp16 = transpose(perm = var_6679, x = attn_output_65_cast_fp16)[name = string("transpose_174")]; + tensor var_6695_cast_fp16 = conv(dilations = var_6695_dilations_0, groups = var_6695_groups_0, pad = var_6695_pad_0, pad_type = var_6695_pad_type_0, strides = var_6695_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_6680_cast_fp16)[name = string("op_6695_cast_fp16")]; + tensor var_6699 = const()[name = string("op_6699"), val = tensor([0, 2, 1])]; + int32 var_6710 = const()[name = string("op_6710"), val = int32(-1)]; + fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_109_cast_fp16 = transpose(perm = var_6699, x = var_6695_cast_fp16)[name = string("transpose_173")]; + tensor var_6712_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6712_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_6710, interleave = input_131_interleave_0, values = (hidden_states_109_cast_fp16, var_6712_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_6707_to_fp16 = const()[name = string("op_6707_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6707_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor var_6726_to_fp16 = const()[name = string("op_6726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505007104)))]; + tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_6726_to_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + int32 var_6739 = const()[name = string("op_6739"), val = int32(-1)]; + fp16 const_287_promoted_to_fp16 = const()[name = string("const_287_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6741_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_287_promoted_to_fp16)[name = string("op_6741_cast_fp16")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133_cast_fp16 = concat(axis = var_6739, interleave = input_133_interleave_0, values = (hidden_states_111_cast_fp16, var_6741_cast_fp16))[name = string("input_133_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_6736_to_fp16 = const()[name = string("op_6736_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6736_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor var_6755_to_fp16 = const()[name = string("op_6755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505009472)))]; + tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_6755_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_6767 = const()[name = string("op_6767"), val = tensor([0, 2, 1])]; + tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; + tensor var_6768_cast_fp16 = transpose(perm = var_6767, x = x_109_cast_fp16)[name = string("transpose_172")]; + tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_6768_cast_fp16)[name = string("input_135_cast_fp16")]; + string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; + tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; + tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; + int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505011840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510983872))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511205120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517177152))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; + string var_6793_mode_0 = const()[name = string("op_6793_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_6793_cast_fp16 = gelu(mode = var_6793_mode_0, x = x_111_cast_fp16)[name = string("op_6793_cast_fp16")]; + tensor input_137_cast_fp16 = mul(x = var_6793_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523370432))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; + tensor var_6801_axes_0 = const()[name = string("op_6801_axes_0"), val = tensor([2])]; + tensor var_6801_cast_fp16 = squeeze(axes = var_6801_axes_0, x = e_13_cast_fp16)[name = string("op_6801_cast_fp16")]; + tensor var_6802 = const()[name = string("op_6802"), val = tensor([0, 2, 1])]; + int32 var_6813 = const()[name = string("op_6813"), val = int32(-1)]; + fp16 const_291_promoted_to_fp16 = const()[name = string("const_291_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_113_cast_fp16 = transpose(perm = var_6802, x = var_6801_cast_fp16)[name = string("transpose_171")]; + tensor var_6815_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_291_promoted_to_fp16)[name = string("op_6815_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_6813, interleave = input_139_interleave_0, values = (hidden_states_113_cast_fp16, var_6815_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_6810_to_fp16 = const()[name = string("op_6810_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_6810_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; + tensor var_6829_to_fp16 = const()[name = string("op_6829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523407360)))]; + tensor hidden_states_115_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_6829_to_fp16)[name = string("hidden_states_115_cast_fp16")]; + tensor hidden_states_117_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = hidden_states_115_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; + int32 var_6883 = const()[name = string("op_6883"), val = int32(-1)]; + fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6885_cast_fp16 = mul(x = hidden_states_117_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6885_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_6883, interleave = input_141_interleave_0, values = (hidden_states_117_cast_fp16, var_6885_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_6880_to_fp16 = const()[name = string("op_6880_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6880_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; + tensor var_6899_to_fp16 = const()[name = string("op_6899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523409728)))]; + tensor hidden_states_119_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6899_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor var_6910 = const()[name = string("op_6910"), val = tensor([0, 2, 1])]; + tensor var_6913_axes_0 = const()[name = string("op_6913_axes_0"), val = tensor([2])]; + tensor var_6911_cast_fp16 = transpose(perm = var_6910, x = hidden_states_119_cast_fp16)[name = string("transpose_170")]; + tensor var_6913_cast_fp16 = expand_dims(axes = var_6913_axes_0, x = var_6911_cast_fp16)[name = string("op_6913_cast_fp16")]; + string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; + tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; + tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; + int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; + tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6913_cast_fp16)[name = string("query_states_57")]; + string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; + tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; + tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; + int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; + tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6913_cast_fp16)[name = string("key_states_71")]; + string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; + tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; + tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; + int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; + tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6913_cast_fp16)[name = string("value_states_57")]; + tensor var_6955 = const()[name = string("op_6955"), val = tensor([1, 4, 256, 64])]; + tensor var_6956 = reshape(shape = var_6955, x = query_states_57)[name = string("op_6956")]; + tensor var_6961 = const()[name = string("op_6961"), val = tensor([0, 1, 3, 2])]; + tensor var_6966 = const()[name = string("op_6966"), val = tensor([1, 1, 256, 64])]; + tensor var_6967 = reshape(shape = var_6966, x = key_states_71)[name = string("op_6967")]; + tensor var_6972 = const()[name = string("op_6972"), val = tensor([0, 1, 3, 2])]; + tensor var_6977 = const()[name = string("op_6977"), val = tensor([1, 1, 256, 64])]; + tensor var_6978 = reshape(shape = var_6977, x = value_states_57)[name = string("op_6978")]; + tensor var_6983 = const()[name = string("op_6983"), val = tensor([0, 1, 3, 2])]; + int32 var_6994 = const()[name = string("op_6994"), val = int32(-1)]; + fp16 const_301_promoted = const()[name = string("const_301_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_121 = transpose(perm = var_6961, x = var_6956)[name = string("transpose_169")]; + tensor var_6996 = mul(x = hidden_states_121, y = const_301_promoted)[name = string("op_6996")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145 = concat(axis = var_6994, interleave = input_145_interleave_0, values = (hidden_states_121, var_6996))[name = string("input_145")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_6991_to_fp16 = const()[name = string("op_6991_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6991_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; + tensor var_7010_to_fp16 = const()[name = string("op_7010_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412096)))]; + tensor q_15_cast_fp16 = mul(x = normed_175, y = var_7010_to_fp16)[name = string("q_15_cast_fp16")]; + int32 var_7021 = const()[name = string("op_7021"), val = int32(-1)]; + fp16 const_305_promoted = const()[name = string("const_305_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_123 = transpose(perm = var_6972, x = var_6967)[name = string("transpose_168")]; + tensor var_7023 = mul(x = hidden_states_123, y = const_305_promoted)[name = string("op_7023")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_7021, interleave = input_147_interleave_0, values = (hidden_states_123, var_7023))[name = string("input_147")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_7018_to_fp16 = const()[name = string("op_7018_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_7018_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; + tensor var_7037_to_fp16 = const()[name = string("op_7037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523412672)))]; + tensor k_15_cast_fp16 = mul(x = normed_179, y = var_7037_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_7051_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5)[name = string("op_7051_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_311_promoted_to_fp16 = const()[name = string("const_311_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7072_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_311_promoted_to_fp16)[name = string("op_7072_cast_fp16")]; + int32 var_7074 = const()[name = string("op_7074"), val = int32(-1)]; + bool var_7075_interleave_0 = const()[name = string("op_7075_interleave_0"), val = bool(false)]; + tensor var_7075_cast_fp16 = concat(axis = var_7074, interleave = var_7075_interleave_0, values = (var_7072_cast_fp16, x1_29_cast_fp16))[name = string("op_7075_cast_fp16")]; + tensor var_7076_cast_fp16 = mul(x = var_7075_cast_fp16, y = sin_5)[name = string("op_7076_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_7051_cast_fp16, y = var_7076_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor var_7079_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5)[name = string("op_7079_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_314_promoted_to_fp16 = const()[name = string("const_314_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7100_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_314_promoted_to_fp16)[name = string("op_7100_cast_fp16")]; + int32 var_7102 = const()[name = string("op_7102"), val = int32(-1)]; + bool var_7103_interleave_0 = const()[name = string("op_7103_interleave_0"), val = bool(false)]; + tensor var_7103_cast_fp16 = concat(axis = var_7102, interleave = var_7103_interleave_0, values = (var_7100_cast_fp16, x1_31_cast_fp16))[name = string("op_7103_cast_fp16")]; + tensor var_7104_cast_fp16 = mul(x = var_7103_cast_fp16, y = sin_5)[name = string("op_7104_cast_fp16")]; + tensor key_states_73_cast_fp16 = add(x = var_7079_cast_fp16, y = var_7104_cast_fp16)[name = string("key_states_73_cast_fp16")]; + tensor key_slice_13_begin_0 = const()[name = string("key_slice_13_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor key_slice_13_end_0 = const()[name = string("key_slice_13_end_0"), val = tensor([7, 1, 512, 256])]; + tensor key_slice_13_end_mask_0 = const()[name = string("key_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_13_cast_fp16 = slice_by_index(begin = key_slice_13_begin_0, end = key_slice_13_end_0, end_mask = key_slice_13_end_mask_0, x = coreml_update_state_65)[name = string("key_slice_13_cast_fp16")]; + tensor var_7141_begin_0 = const()[name = string("op_7141_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7141_end_0 = const()[name = string("op_7141_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_7141_end_mask_0 = const()[name = string("op_7141_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7141_cast_fp16 = slice_by_index(begin = var_7141_begin_0, end = var_7141_end_0, end_mask = var_7141_end_mask_0, x = key_slice_13_cast_fp16)[name = string("op_7141_cast_fp16")]; + int32 var_7168 = const()[name = string("op_7168"), val = int32(2)]; + bool shifted_key_13_interleave_0 = const()[name = string("shifted_key_13_interleave_0"), val = bool(false)]; + tensor shifted_key_13_cast_fp16 = concat(axis = var_7168, interleave = shifted_key_13_interleave_0, values = (var_7141_cast_fp16, key_states_73_cast_fp16))[name = string("shifted_key_13_cast_fp16")]; + tensor concat_98 = const()[name = string("concat_98"), val = tensor([6, 0, 0, 0])]; + tensor concat_99 = const()[name = string("concat_99"), val = tensor([7, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = shifted_key_13_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_170_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_170")]; + tensor value_slice_13_begin_0 = const()[name = string("value_slice_13_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor value_slice_13_end_0 = const()[name = string("value_slice_13_end_0"), val = tensor([29, 1, 512, 256])]; + tensor value_slice_13_end_mask_0 = const()[name = string("value_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_13_cast_fp16 = slice_by_index(begin = value_slice_13_begin_0, end = value_slice_13_end_0, end_mask = value_slice_13_end_mask_0, x = coreml_update_state_66)[name = string("value_slice_13_cast_fp16")]; + tensor var_7211_begin_0 = const()[name = string("op_7211_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7211_end_0 = const()[name = string("op_7211_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_7211_end_mask_0 = const()[name = string("op_7211_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7211_cast_fp16 = slice_by_index(begin = var_7211_begin_0, end = var_7211_end_0, end_mask = var_7211_end_mask_0, x = value_slice_13_cast_fp16)[name = string("op_7211_cast_fp16")]; + int32 var_7238 = const()[name = string("op_7238"), val = int32(2)]; + bool shifted_value_13_interleave_0 = const()[name = string("shifted_value_13_interleave_0"), val = bool(false)]; + tensor value_states_59 = transpose(perm = var_6983, x = var_6978)[name = string("transpose_167")]; + tensor shifted_value_13_cast_fp16 = concat(axis = var_7238, interleave = shifted_value_13_interleave_0, values = (var_7211_cast_fp16, value_states_59))[name = string("shifted_value_13_cast_fp16")]; + tensor concat_100 = const()[name = string("concat_100"), val = tensor([28, 0, 0, 0])]; + tensor concat_101 = const()[name = string("concat_101"), val = tensor([29, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_100, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_101, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = shifted_value_13_cast_fp16, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_171_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_171")]; + tensor var_7266_begin_0 = const()[name = string("op_7266_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_7266_end_0 = const()[name = string("op_7266_end_0"), val = tensor([7, 1, 512, 256])]; + tensor var_7266_end_mask_0 = const()[name = string("op_7266_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7266_cast_fp16 = slice_by_index(begin = var_7266_begin_0, end = var_7266_end_0, end_mask = var_7266_end_mask_0, x = coreml_update_state_67)[name = string("op_7266_cast_fp16")]; + tensor var_7273_begin_0 = const()[name = string("op_7273_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_7273_end_0 = const()[name = string("op_7273_end_0"), val = tensor([29, 1, 512, 256])]; + tensor var_7273_end_mask_0 = const()[name = string("op_7273_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7273_cast_fp16 = slice_by_index(begin = var_7273_begin_0, end = var_7273_end_0, end_mask = var_7273_end_mask_0, x = coreml_update_state_67)[name = string("op_7273_cast_fp16")]; + tensor var_7312 = const()[name = string("op_7312"), val = tensor([1, 4, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_7312, x = var_7266_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_7332 = const()[name = string("op_7332"), val = tensor([1, 4, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_7332, x = var_7273_cast_fp16)[name = string("x_123_cast_fp16")]; + bool var_7359_transpose_x_0 = const()[name = string("op_7359_transpose_x_0"), val = bool(false)]; + bool var_7359_transpose_y_0 = const()[name = string("op_7359_transpose_y_0"), val = bool(true)]; + tensor var_7359 = matmul(transpose_x = var_7359_transpose_x_0, transpose_y = var_7359_transpose_y_0, x = query_states_59_cast_fp16, y = x_117_cast_fp16)[name = string("op_7359")]; + fp16 var_7360_to_fp16 = const()[name = string("op_7360_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_7359, y = var_7360_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = mask_slice_1)[name = string("attn_weights_31_cast_fp16")]; + int32 var_7395 = const()[name = string("op_7395"), val = int32(-1)]; + tensor var_7397_cast_fp16 = softmax(axis = var_7395, x = attn_weights_31_cast_fp16)[name = string("op_7397_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([4, 64, 512])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_106, x = var_7397_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([4, 512, 256])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_107, x = x_123_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_111 = const()[name = string("concat_111"), val = tensor([1, 4, 64, 256])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_111, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_7409_perm_0 = const()[name = string("op_7409_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7428 = const()[name = string("op_7428"), val = tensor([1, 64, 1024])]; + tensor var_7409_cast_fp16 = transpose(perm = var_7409_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_166")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_7428, x = var_7409_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_7433 = const()[name = string("op_7433"), val = tensor([0, 2, 1])]; + string var_7449_pad_type_0 = const()[name = string("op_7449_pad_type_0"), val = string("valid")]; + int32 var_7449_groups_0 = const()[name = string("op_7449_groups_0"), val = int32(1)]; + tensor var_7449_strides_0 = const()[name = string("op_7449_strides_0"), val = tensor([1])]; + tensor var_7449_pad_0 = const()[name = string("op_7449_pad_0"), val = tensor([0, 0])]; + tensor var_7449_dilations_0 = const()[name = string("op_7449_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523413248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524298048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7434_cast_fp16 = transpose(perm = var_7433, x = attn_output_75_cast_fp16)[name = string("transpose_165")]; + tensor var_7449_cast_fp16 = conv(dilations = var_7449_dilations_0, groups = var_7449_groups_0, pad = var_7449_pad_0, pad_type = var_7449_pad_type_0, strides = var_7449_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_7434_cast_fp16)[name = string("op_7449_cast_fp16")]; + tensor var_7453 = const()[name = string("op_7453"), val = tensor([0, 2, 1])]; + int32 var_7464 = const()[name = string("op_7464"), val = int32(-1)]; + fp16 const_325_promoted_to_fp16 = const()[name = string("const_325_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_125_cast_fp16 = transpose(perm = var_7453, x = var_7449_cast_fp16)[name = string("transpose_164")]; + tensor var_7466_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_325_promoted_to_fp16)[name = string("op_7466_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_7464, interleave = input_151_interleave_0, values = (hidden_states_125_cast_fp16, var_7466_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_7461_to_fp16 = const()[name = string("op_7461_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7461_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; + tensor var_7480_to_fp16 = const()[name = string("op_7480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524334976)))]; + tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_7480_to_fp16)[name = string("attn_output_79_cast_fp16")]; + tensor hidden_states_127_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + int32 var_7493 = const()[name = string("op_7493"), val = int32(-1)]; + fp16 const_329_promoted_to_fp16 = const()[name = string("const_329_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7495_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = const_329_promoted_to_fp16)[name = string("op_7495_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_7493, interleave = input_153_interleave_0, values = (hidden_states_127_cast_fp16, var_7495_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_7490_to_fp16 = const()[name = string("op_7490_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7490_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; + tensor var_7509_to_fp16 = const()[name = string("op_7509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524337344)))]; + tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_7509_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_7521 = const()[name = string("op_7521"), val = tensor([0, 2, 1])]; + tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; + tensor var_7522_cast_fp16 = transpose(perm = var_7521, x = x_125_cast_fp16)[name = string("transpose_163")]; + tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_7522_cast_fp16)[name = string("input_155_cast_fp16")]; + string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; + tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; + tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; + int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524339712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530311744))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530532992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536505024))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; + string var_7547_mode_0 = const()[name = string("op_7547_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_7547_cast_fp16 = gelu(mode = var_7547_mode_0, x = x_127_cast_fp16)[name = string("op_7547_cast_fp16")]; + tensor input_157_cast_fp16 = mul(x = var_7547_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536726272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542698304))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; + tensor var_7555_axes_0 = const()[name = string("op_7555_axes_0"), val = tensor([2])]; + tensor var_7555_cast_fp16 = squeeze(axes = var_7555_axes_0, x = e_15_cast_fp16)[name = string("op_7555_cast_fp16")]; + tensor var_7556 = const()[name = string("op_7556"), val = tensor([0, 2, 1])]; + int32 var_7567 = const()[name = string("op_7567"), val = int32(-1)]; + fp16 const_333_promoted_to_fp16 = const()[name = string("const_333_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_129_cast_fp16 = transpose(perm = var_7556, x = var_7555_cast_fp16)[name = string("transpose_162")]; + tensor var_7569_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_333_promoted_to_fp16)[name = string("op_7569_cast_fp16")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159_cast_fp16 = concat(axis = var_7567, interleave = input_159_interleave_0, values = (hidden_states_129_cast_fp16, var_7569_cast_fp16))[name = string("input_159_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_7564_to_fp16 = const()[name = string("op_7564_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7564_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor var_7583_to_fp16 = const()[name = string("op_7583_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542735232)))]; + tensor hidden_states_131_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_7583_to_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + int32 var_7637 = const()[name = string("op_7637"), val = int32(-1)]; + fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7639_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7639_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_7637, interleave = input_161_interleave_0, values = (hidden_states_133_cast_fp16, var_7639_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_7634_to_fp16 = const()[name = string("op_7634_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_7634_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor var_7653_to_fp16 = const()[name = string("op_7653_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542737600)))]; + tensor hidden_states_135_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_7653_to_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor var_7664 = const()[name = string("op_7664"), val = tensor([0, 2, 1])]; + tensor var_7667_axes_0 = const()[name = string("op_7667_axes_0"), val = tensor([2])]; + tensor var_7665_cast_fp16 = transpose(perm = var_7664, x = hidden_states_135_cast_fp16)[name = string("transpose_161")]; + tensor var_7667_cast_fp16 = expand_dims(axes = var_7667_axes_0, x = var_7665_cast_fp16)[name = string("op_7667_cast_fp16")]; + string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; + tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; + tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; + int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; + tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_7667_cast_fp16)[name = string("query_states_65")]; + string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; + tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; + tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; + int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; + tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_7667_cast_fp16)[name = string("key_states_81")]; + string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; + tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; + tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; + int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; + tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_7667_cast_fp16)[name = string("value_states_65")]; + tensor var_7709 = const()[name = string("op_7709"), val = tensor([1, 4, 256, 64])]; + tensor var_7710 = reshape(shape = var_7709, x = query_states_65)[name = string("op_7710")]; + tensor var_7715 = const()[name = string("op_7715"), val = tensor([0, 1, 3, 2])]; + tensor var_7720 = const()[name = string("op_7720"), val = tensor([1, 1, 256, 64])]; + tensor var_7721 = reshape(shape = var_7720, x = key_states_81)[name = string("op_7721")]; + tensor var_7726 = const()[name = string("op_7726"), val = tensor([0, 1, 3, 2])]; + tensor var_7731 = const()[name = string("op_7731"), val = tensor([1, 1, 256, 64])]; + tensor var_7732 = reshape(shape = var_7731, x = value_states_65)[name = string("op_7732")]; + tensor var_7737 = const()[name = string("op_7737"), val = tensor([0, 1, 3, 2])]; + int32 var_7748 = const()[name = string("op_7748"), val = int32(-1)]; + fp16 const_343_promoted = const()[name = string("const_343_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_137 = transpose(perm = var_7715, x = var_7710)[name = string("transpose_160")]; + tensor var_7750 = mul(x = hidden_states_137, y = const_343_promoted)[name = string("op_7750")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165 = concat(axis = var_7748, interleave = input_165_interleave_0, values = (hidden_states_137, var_7750))[name = string("input_165")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_7745_to_fp16 = const()[name = string("op_7745_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_7745_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor var_7764_to_fp16 = const()[name = string("op_7764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542739968)))]; + tensor q_17_cast_fp16 = mul(x = normed_199, y = var_7764_to_fp16)[name = string("q_17_cast_fp16")]; + int32 var_7775 = const()[name = string("op_7775"), val = int32(-1)]; + fp16 const_347_promoted = const()[name = string("const_347_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_139 = transpose(perm = var_7726, x = var_7721)[name = string("transpose_159")]; + tensor var_7777 = mul(x = hidden_states_139, y = const_347_promoted)[name = string("op_7777")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_7775, interleave = input_167_interleave_0, values = (hidden_states_139, var_7777))[name = string("input_167")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_7772_to_fp16 = const()[name = string("op_7772_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_7772_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor var_7791_to_fp16 = const()[name = string("op_7791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542740544)))]; + tensor k_17_cast_fp16 = mul(x = normed_203, y = var_7791_to_fp16)[name = string("k_17_cast_fp16")]; + tensor var_7805_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5)[name = string("op_7805_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_353_promoted_to_fp16 = const()[name = string("const_353_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7826_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_353_promoted_to_fp16)[name = string("op_7826_cast_fp16")]; + int32 var_7828 = const()[name = string("op_7828"), val = int32(-1)]; + bool var_7829_interleave_0 = const()[name = string("op_7829_interleave_0"), val = bool(false)]; + tensor var_7829_cast_fp16 = concat(axis = var_7828, interleave = var_7829_interleave_0, values = (var_7826_cast_fp16, x1_33_cast_fp16))[name = string("op_7829_cast_fp16")]; + tensor var_7830_cast_fp16 = mul(x = var_7829_cast_fp16, y = sin_5)[name = string("op_7830_cast_fp16")]; + tensor query_states_67_cast_fp16 = add(x = var_7805_cast_fp16, y = var_7830_cast_fp16)[name = string("query_states_67_cast_fp16")]; + tensor var_7833_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5)[name = string("op_7833_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7854_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7854_cast_fp16")]; + int32 var_7856 = const()[name = string("op_7856"), val = int32(-1)]; + bool var_7857_interleave_0 = const()[name = string("op_7857_interleave_0"), val = bool(false)]; + tensor var_7857_cast_fp16 = concat(axis = var_7856, interleave = var_7857_interleave_0, values = (var_7854_cast_fp16, x1_35_cast_fp16))[name = string("op_7857_cast_fp16")]; + tensor var_7858_cast_fp16 = mul(x = var_7857_cast_fp16, y = sin_5)[name = string("op_7858_cast_fp16")]; + tensor key_states_83_cast_fp16 = add(x = var_7833_cast_fp16, y = var_7858_cast_fp16)[name = string("key_states_83_cast_fp16")]; + tensor key_slice_15_begin_0 = const()[name = string("key_slice_15_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor key_slice_15_end_0 = const()[name = string("key_slice_15_end_0"), val = tensor([8, 1, 512, 256])]; + tensor key_slice_15_end_mask_0 = const()[name = string("key_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_15_cast_fp16 = slice_by_index(begin = key_slice_15_begin_0, end = key_slice_15_end_0, end_mask = key_slice_15_end_mask_0, x = coreml_update_state_67)[name = string("key_slice_15_cast_fp16")]; + tensor var_7895_begin_0 = const()[name = string("op_7895_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7895_end_0 = const()[name = string("op_7895_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_7895_end_mask_0 = const()[name = string("op_7895_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7895_cast_fp16 = slice_by_index(begin = var_7895_begin_0, end = var_7895_end_0, end_mask = var_7895_end_mask_0, x = key_slice_15_cast_fp16)[name = string("op_7895_cast_fp16")]; + int32 var_7922 = const()[name = string("op_7922"), val = int32(2)]; + bool shifted_key_15_interleave_0 = const()[name = string("shifted_key_15_interleave_0"), val = bool(false)]; + tensor shifted_key_15_cast_fp16 = concat(axis = var_7922, interleave = shifted_key_15_interleave_0, values = (var_7895_cast_fp16, key_states_83_cast_fp16))[name = string("shifted_key_15_cast_fp16")]; + tensor concat_112 = const()[name = string("concat_112"), val = tensor([7, 0, 0, 0])]; + tensor concat_113 = const()[name = string("concat_113"), val = tensor([8, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_112, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_113, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = shifted_key_15_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_172_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_172")]; + tensor value_slice_15_begin_0 = const()[name = string("value_slice_15_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor value_slice_15_end_0 = const()[name = string("value_slice_15_end_0"), val = tensor([30, 1, 512, 256])]; + tensor value_slice_15_end_mask_0 = const()[name = string("value_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_15_cast_fp16 = slice_by_index(begin = value_slice_15_begin_0, end = value_slice_15_end_0, end_mask = value_slice_15_end_mask_0, x = coreml_update_state_68)[name = string("value_slice_15_cast_fp16")]; + tensor var_7965_begin_0 = const()[name = string("op_7965_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7965_end_0 = const()[name = string("op_7965_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_7965_end_mask_0 = const()[name = string("op_7965_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7965_cast_fp16 = slice_by_index(begin = var_7965_begin_0, end = var_7965_end_0, end_mask = var_7965_end_mask_0, x = value_slice_15_cast_fp16)[name = string("op_7965_cast_fp16")]; + int32 var_7992 = const()[name = string("op_7992"), val = int32(2)]; + bool shifted_value_15_interleave_0 = const()[name = string("shifted_value_15_interleave_0"), val = bool(false)]; + tensor value_states_67 = transpose(perm = var_7737, x = var_7732)[name = string("transpose_158")]; + tensor shifted_value_15_cast_fp16 = concat(axis = var_7992, interleave = shifted_value_15_interleave_0, values = (var_7965_cast_fp16, value_states_67))[name = string("shifted_value_15_cast_fp16")]; + tensor concat_114 = const()[name = string("concat_114"), val = tensor([29, 0, 0, 0])]; + tensor concat_115 = const()[name = string("concat_115"), val = tensor([30, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = shifted_value_15_cast_fp16, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_173_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_173")]; + tensor var_8020_begin_0 = const()[name = string("op_8020_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_8020_end_0 = const()[name = string("op_8020_end_0"), val = tensor([8, 1, 512, 256])]; + tensor var_8020_end_mask_0 = const()[name = string("op_8020_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8020_cast_fp16 = slice_by_index(begin = var_8020_begin_0, end = var_8020_end_0, end_mask = var_8020_end_mask_0, x = coreml_update_state_69)[name = string("op_8020_cast_fp16")]; + tensor var_8027_begin_0 = const()[name = string("op_8027_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_8027_end_0 = const()[name = string("op_8027_end_0"), val = tensor([30, 1, 512, 256])]; + tensor var_8027_end_mask_0 = const()[name = string("op_8027_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8027_cast_fp16 = slice_by_index(begin = var_8027_begin_0, end = var_8027_end_0, end_mask = var_8027_end_mask_0, x = coreml_update_state_69)[name = string("op_8027_cast_fp16")]; + tensor var_8066 = const()[name = string("op_8066"), val = tensor([1, 4, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_8066, x = var_8020_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_8086 = const()[name = string("op_8086"), val = tensor([1, 4, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_8086, x = var_8027_cast_fp16)[name = string("x_139_cast_fp16")]; + bool var_8113_transpose_x_0 = const()[name = string("op_8113_transpose_x_0"), val = bool(false)]; + bool var_8113_transpose_y_0 = const()[name = string("op_8113_transpose_y_0"), val = bool(true)]; + tensor var_8113 = matmul(transpose_x = var_8113_transpose_x_0, transpose_y = var_8113_transpose_y_0, x = query_states_67_cast_fp16, y = x_133_cast_fp16)[name = string("op_8113")]; + fp16 var_8114_to_fp16 = const()[name = string("op_8114_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_8113, y = var_8114_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = mask_slice_1)[name = string("attn_weights_35_cast_fp16")]; + int32 var_8149 = const()[name = string("op_8149"), val = int32(-1)]; + tensor var_8151_cast_fp16 = softmax(axis = var_8149, x = attn_weights_35_cast_fp16)[name = string("op_8151_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([4, 64, 512])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_120, x = var_8151_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([4, 512, 256])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_121, x = x_139_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 4, 64, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_125, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_8163_perm_0 = const()[name = string("op_8163_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8182 = const()[name = string("op_8182"), val = tensor([1, 64, 1024])]; + tensor var_8163_cast_fp16 = transpose(perm = var_8163_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_157")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_8182, x = var_8163_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_8187 = const()[name = string("op_8187"), val = tensor([0, 2, 1])]; + string var_8203_pad_type_0 = const()[name = string("op_8203_pad_type_0"), val = string("valid")]; + int32 var_8203_groups_0 = const()[name = string("op_8203_groups_0"), val = int32(1)]; + tensor var_8203_strides_0 = const()[name = string("op_8203_strides_0"), val = tensor([1])]; + tensor var_8203_pad_0 = const()[name = string("op_8203_pad_0"), val = tensor([0, 0])]; + tensor var_8203_dilations_0 = const()[name = string("op_8203_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542741120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543625920))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8188_cast_fp16 = transpose(perm = var_8187, x = attn_output_85_cast_fp16)[name = string("transpose_156")]; + tensor var_8203_cast_fp16 = conv(dilations = var_8203_dilations_0, groups = var_8203_groups_0, pad = var_8203_pad_0, pad_type = var_8203_pad_type_0, strides = var_8203_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_8188_cast_fp16)[name = string("op_8203_cast_fp16")]; + tensor var_8207 = const()[name = string("op_8207"), val = tensor([0, 2, 1])]; + int32 var_8218 = const()[name = string("op_8218"), val = int32(-1)]; + fp16 const_367_promoted_to_fp16 = const()[name = string("const_367_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_141_cast_fp16 = transpose(perm = var_8207, x = var_8203_cast_fp16)[name = string("transpose_155")]; + tensor var_8220_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_367_promoted_to_fp16)[name = string("op_8220_cast_fp16")]; + bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; + tensor input_171_cast_fp16 = concat(axis = var_8218, interleave = input_171_interleave_0, values = (hidden_states_141_cast_fp16, var_8220_cast_fp16))[name = string("input_171_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_8215_to_fp16 = const()[name = string("op_8215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_8215_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor var_8234_to_fp16 = const()[name = string("op_8234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543662848)))]; + tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_8234_to_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + int32 var_8247 = const()[name = string("op_8247"), val = int32(-1)]; + fp16 const_371_promoted_to_fp16 = const()[name = string("const_371_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8249_cast_fp16 = mul(x = hidden_states_143_cast_fp16, y = const_371_promoted_to_fp16)[name = string("op_8249_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_8247, interleave = input_173_interleave_0, values = (hidden_states_143_cast_fp16, var_8249_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_8244_to_fp16 = const()[name = string("op_8244_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_8244_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor var_8263_to_fp16 = const()[name = string("op_8263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543665216)))]; + tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_8263_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_8275 = const()[name = string("op_8275"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_8276_cast_fp16 = transpose(perm = var_8275, x = x_141_cast_fp16)[name = string("transpose_154")]; + tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_8276_cast_fp16)[name = string("input_175_cast_fp16")]; + string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; + tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; + tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; + int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543667584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549639616))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549860864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555832896))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; + string var_8301_mode_0 = const()[name = string("op_8301_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_8301_cast_fp16 = gelu(mode = var_8301_mode_0, x = x_143_cast_fp16)[name = string("op_8301_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = var_8301_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556054144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562026176))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; + tensor var_8309_axes_0 = const()[name = string("op_8309_axes_0"), val = tensor([2])]; + tensor var_8309_cast_fp16 = squeeze(axes = var_8309_axes_0, x = e_17_cast_fp16)[name = string("op_8309_cast_fp16")]; + tensor var_8310 = const()[name = string("op_8310"), val = tensor([0, 2, 1])]; + int32 var_8321 = const()[name = string("op_8321"), val = int32(-1)]; + fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_145_cast_fp16 = transpose(perm = var_8310, x = var_8309_cast_fp16)[name = string("transpose_153")]; + tensor var_8323_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_375_promoted_to_fp16)[name = string("op_8323_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_8321, interleave = input_179_interleave_0, values = (hidden_states_145_cast_fp16, var_8323_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_8318_to_fp16 = const()[name = string("op_8318_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_8318_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; + tensor var_8337_to_fp16 = const()[name = string("op_8337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063104)))]; + tensor hidden_states_147_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_8337_to_fp16)[name = string("hidden_states_147_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = hidden_states_147_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_8391 = const()[name = string("op_8391"), val = int32(-1)]; + fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8393_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_8393_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_8391, interleave = input_181_interleave_0, values = (hidden_states_149_cast_fp16, var_8393_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_8388_to_fp16 = const()[name = string("op_8388_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_8388_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; + tensor var_8407_to_fp16 = const()[name = string("op_8407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065472)))]; + tensor hidden_states_151_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_8407_to_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor var_8418 = const()[name = string("op_8418"), val = tensor([0, 2, 1])]; + tensor var_8421_axes_0 = const()[name = string("op_8421_axes_0"), val = tensor([2])]; + tensor var_8419_cast_fp16 = transpose(perm = var_8418, x = hidden_states_151_cast_fp16)[name = string("transpose_152")]; + tensor var_8421_cast_fp16 = expand_dims(axes = var_8421_axes_0, x = var_8419_cast_fp16)[name = string("op_8421_cast_fp16")]; + string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; + tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; + tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; + int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; + tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_8421_cast_fp16)[name = string("query_states_73")]; + string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; + tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; + tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; + int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; + tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_8421_cast_fp16)[name = string("key_states_91")]; + string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; + tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; + tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; + int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; + tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_8421_cast_fp16)[name = string("value_states_73")]; + tensor var_8463 = const()[name = string("op_8463"), val = tensor([1, 4, 256, 64])]; + tensor var_8464 = reshape(shape = var_8463, x = query_states_73)[name = string("op_8464")]; + tensor var_8469 = const()[name = string("op_8469"), val = tensor([0, 1, 3, 2])]; + tensor var_8474 = const()[name = string("op_8474"), val = tensor([1, 1, 256, 64])]; + tensor var_8475 = reshape(shape = var_8474, x = key_states_91)[name = string("op_8475")]; + tensor var_8480 = const()[name = string("op_8480"), val = tensor([0, 1, 3, 2])]; + tensor var_8485 = const()[name = string("op_8485"), val = tensor([1, 1, 256, 64])]; + tensor var_8486 = reshape(shape = var_8485, x = value_states_73)[name = string("op_8486")]; + tensor var_8491 = const()[name = string("op_8491"), val = tensor([0, 1, 3, 2])]; + int32 var_8502 = const()[name = string("op_8502"), val = int32(-1)]; + fp16 const_385_promoted = const()[name = string("const_385_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_153 = transpose(perm = var_8469, x = var_8464)[name = string("transpose_151")]; + tensor var_8504 = mul(x = hidden_states_153, y = const_385_promoted)[name = string("op_8504")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_8502, interleave = input_185_interleave_0, values = (hidden_states_153, var_8504))[name = string("input_185")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_8499_to_fp16 = const()[name = string("op_8499_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8499_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; + tensor var_8518_to_fp16 = const()[name = string("op_8518_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562067840)))]; + tensor q_19_cast_fp16 = mul(x = normed_223, y = var_8518_to_fp16)[name = string("q_19_cast_fp16")]; + int32 var_8529 = const()[name = string("op_8529"), val = int32(-1)]; + fp16 const_389_promoted = const()[name = string("const_389_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_155 = transpose(perm = var_8480, x = var_8475)[name = string("transpose_150")]; + tensor var_8531 = mul(x = hidden_states_155, y = const_389_promoted)[name = string("op_8531")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_8529, interleave = input_187_interleave_0, values = (hidden_states_155, var_8531))[name = string("input_187")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_8526_to_fp16 = const()[name = string("op_8526_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8526_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; + tensor var_8545_to_fp16 = const()[name = string("op_8545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068416)))]; + tensor k_19_cast_fp16 = mul(x = normed_227, y = var_8545_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_8559_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5)[name = string("op_8559_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_395_promoted_to_fp16 = const()[name = string("const_395_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8580_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_395_promoted_to_fp16)[name = string("op_8580_cast_fp16")]; + int32 var_8582 = const()[name = string("op_8582"), val = int32(-1)]; + bool var_8583_interleave_0 = const()[name = string("op_8583_interleave_0"), val = bool(false)]; + tensor var_8583_cast_fp16 = concat(axis = var_8582, interleave = var_8583_interleave_0, values = (var_8580_cast_fp16, x1_37_cast_fp16))[name = string("op_8583_cast_fp16")]; + tensor var_8584_cast_fp16 = mul(x = var_8583_cast_fp16, y = sin_5)[name = string("op_8584_cast_fp16")]; + tensor query_states_75_cast_fp16 = add(x = var_8559_cast_fp16, y = var_8584_cast_fp16)[name = string("query_states_75_cast_fp16")]; + tensor var_8587_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5)[name = string("op_8587_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_398_promoted_to_fp16 = const()[name = string("const_398_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8608_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_398_promoted_to_fp16)[name = string("op_8608_cast_fp16")]; + int32 var_8610 = const()[name = string("op_8610"), val = int32(-1)]; + bool var_8611_interleave_0 = const()[name = string("op_8611_interleave_0"), val = bool(false)]; + tensor var_8611_cast_fp16 = concat(axis = var_8610, interleave = var_8611_interleave_0, values = (var_8608_cast_fp16, x1_39_cast_fp16))[name = string("op_8611_cast_fp16")]; + tensor var_8612_cast_fp16 = mul(x = var_8611_cast_fp16, y = sin_5)[name = string("op_8612_cast_fp16")]; + tensor key_states_93_cast_fp16 = add(x = var_8587_cast_fp16, y = var_8612_cast_fp16)[name = string("key_states_93_cast_fp16")]; + tensor key_slice_17_begin_0 = const()[name = string("key_slice_17_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor key_slice_17_end_0 = const()[name = string("key_slice_17_end_0"), val = tensor([9, 1, 512, 256])]; + tensor key_slice_17_end_mask_0 = const()[name = string("key_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_17_cast_fp16 = slice_by_index(begin = key_slice_17_begin_0, end = key_slice_17_end_0, end_mask = key_slice_17_end_mask_0, x = coreml_update_state_69)[name = string("key_slice_17_cast_fp16")]; + tensor var_8649_begin_0 = const()[name = string("op_8649_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8649_end_0 = const()[name = string("op_8649_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_8649_end_mask_0 = const()[name = string("op_8649_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8649_cast_fp16 = slice_by_index(begin = var_8649_begin_0, end = var_8649_end_0, end_mask = var_8649_end_mask_0, x = key_slice_17_cast_fp16)[name = string("op_8649_cast_fp16")]; + int32 var_8676 = const()[name = string("op_8676"), val = int32(2)]; + bool shifted_key_17_interleave_0 = const()[name = string("shifted_key_17_interleave_0"), val = bool(false)]; + tensor shifted_key_17_cast_fp16 = concat(axis = var_8676, interleave = shifted_key_17_interleave_0, values = (var_8649_cast_fp16, key_states_93_cast_fp16))[name = string("shifted_key_17_cast_fp16")]; + tensor concat_126 = const()[name = string("concat_126"), val = tensor([8, 0, 0, 0])]; + tensor concat_127 = const()[name = string("concat_127"), val = tensor([9, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = shifted_key_17_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_174_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_174")]; + tensor value_slice_17_begin_0 = const()[name = string("value_slice_17_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor value_slice_17_end_0 = const()[name = string("value_slice_17_end_0"), val = tensor([31, 1, 512, 256])]; + tensor value_slice_17_end_mask_0 = const()[name = string("value_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_17_cast_fp16 = slice_by_index(begin = value_slice_17_begin_0, end = value_slice_17_end_0, end_mask = value_slice_17_end_mask_0, x = coreml_update_state_70)[name = string("value_slice_17_cast_fp16")]; + tensor var_8719_begin_0 = const()[name = string("op_8719_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8719_end_0 = const()[name = string("op_8719_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_8719_end_mask_0 = const()[name = string("op_8719_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8719_cast_fp16 = slice_by_index(begin = var_8719_begin_0, end = var_8719_end_0, end_mask = var_8719_end_mask_0, x = value_slice_17_cast_fp16)[name = string("op_8719_cast_fp16")]; + int32 var_8746 = const()[name = string("op_8746"), val = int32(2)]; + bool shifted_value_17_interleave_0 = const()[name = string("shifted_value_17_interleave_0"), val = bool(false)]; + tensor value_states_75 = transpose(perm = var_8491, x = var_8486)[name = string("transpose_149")]; + tensor shifted_value_17_cast_fp16 = concat(axis = var_8746, interleave = shifted_value_17_interleave_0, values = (var_8719_cast_fp16, value_states_75))[name = string("shifted_value_17_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([30, 0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([31, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = shifted_value_17_cast_fp16, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_175_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_175")]; + tensor var_8774_begin_0 = const()[name = string("op_8774_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_8774_end_0 = const()[name = string("op_8774_end_0"), val = tensor([9, 1, 512, 256])]; + tensor var_8774_end_mask_0 = const()[name = string("op_8774_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8774_cast_fp16 = slice_by_index(begin = var_8774_begin_0, end = var_8774_end_0, end_mask = var_8774_end_mask_0, x = coreml_update_state_71)[name = string("op_8774_cast_fp16")]; + tensor var_8781_begin_0 = const()[name = string("op_8781_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_8781_end_0 = const()[name = string("op_8781_end_0"), val = tensor([31, 1, 512, 256])]; + tensor var_8781_end_mask_0 = const()[name = string("op_8781_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8781_cast_fp16 = slice_by_index(begin = var_8781_begin_0, end = var_8781_end_0, end_mask = var_8781_end_mask_0, x = coreml_update_state_71)[name = string("op_8781_cast_fp16")]; + tensor var_8820 = const()[name = string("op_8820"), val = tensor([1, 4, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_8820, x = var_8774_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_8840 = const()[name = string("op_8840"), val = tensor([1, 4, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_8840, x = var_8781_cast_fp16)[name = string("x_155_cast_fp16")]; + bool var_8867_transpose_x_0 = const()[name = string("op_8867_transpose_x_0"), val = bool(false)]; + bool var_8867_transpose_y_0 = const()[name = string("op_8867_transpose_y_0"), val = bool(true)]; + tensor var_8867 = matmul(transpose_x = var_8867_transpose_x_0, transpose_y = var_8867_transpose_y_0, x = query_states_75_cast_fp16, y = x_149_cast_fp16)[name = string("op_8867")]; + fp16 var_8868_to_fp16 = const()[name = string("op_8868_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_8867, y = var_8868_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = mask_slice_1)[name = string("attn_weights_39_cast_fp16")]; + int32 var_8903 = const()[name = string("op_8903"), val = int32(-1)]; + tensor var_8905_cast_fp16 = softmax(axis = var_8903, x = attn_weights_39_cast_fp16)[name = string("op_8905_cast_fp16")]; + tensor concat_134 = const()[name = string("concat_134"), val = tensor([4, 64, 512])]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_134, x = var_8905_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor concat_135 = const()[name = string("concat_135"), val = tensor([4, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_135, x = x_155_cast_fp16)[name = string("reshape_28_cast_fp16")]; + bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; + bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; + tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([1, 4, 64, 256])]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_139, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor var_8917_perm_0 = const()[name = string("op_8917_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8936 = const()[name = string("op_8936"), val = tensor([1, 64, 1024])]; + tensor var_8917_cast_fp16 = transpose(perm = var_8917_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_8936, x = var_8917_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_8941 = const()[name = string("op_8941"), val = tensor([0, 2, 1])]; + string var_8957_pad_type_0 = const()[name = string("op_8957_pad_type_0"), val = string("valid")]; + int32 var_8957_groups_0 = const()[name = string("op_8957_groups_0"), val = int32(1)]; + tensor var_8957_strides_0 = const()[name = string("op_8957_strides_0"), val = tensor([1])]; + tensor var_8957_pad_0 = const()[name = string("op_8957_pad_0"), val = tensor([0, 0])]; + tensor var_8957_dilations_0 = const()[name = string("op_8957_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562068992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562953792))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8942_cast_fp16 = transpose(perm = var_8941, x = attn_output_95_cast_fp16)[name = string("transpose_147")]; + tensor var_8957_cast_fp16 = conv(dilations = var_8957_dilations_0, groups = var_8957_groups_0, pad = var_8957_pad_0, pad_type = var_8957_pad_type_0, strides = var_8957_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_8942_cast_fp16)[name = string("op_8957_cast_fp16")]; + tensor var_8961 = const()[name = string("op_8961"), val = tensor([0, 2, 1])]; + int32 var_8972 = const()[name = string("op_8972"), val = int32(-1)]; + fp16 const_409_promoted_to_fp16 = const()[name = string("const_409_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_157_cast_fp16 = transpose(perm = var_8961, x = var_8957_cast_fp16)[name = string("transpose_146")]; + tensor var_8974_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_409_promoted_to_fp16)[name = string("op_8974_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_8972, interleave = input_191_interleave_0, values = (hidden_states_157_cast_fp16, var_8974_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_8969_to_fp16 = const()[name = string("op_8969_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_8969_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; + tensor var_8988_to_fp16 = const()[name = string("op_8988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562990720)))]; + tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_8988_to_fp16)[name = string("attn_output_99_cast_fp16")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_9001 = const()[name = string("op_9001"), val = int32(-1)]; + fp16 const_413_promoted_to_fp16 = const()[name = string("const_413_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9003_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_413_promoted_to_fp16)[name = string("op_9003_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_9001, interleave = input_193_interleave_0, values = (hidden_states_159_cast_fp16, var_9003_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_8998_to_fp16 = const()[name = string("op_8998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_8998_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; + tensor var_9017_to_fp16 = const()[name = string("op_9017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562993088)))]; + tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_9017_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_9029 = const()[name = string("op_9029"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_9030_cast_fp16 = transpose(perm = var_9029, x = x_157_cast_fp16)[name = string("transpose_145")]; + tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_9030_cast_fp16)[name = string("input_195_cast_fp16")]; + string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; + tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; + tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; + int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562995456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568967488))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569188736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575160768))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; + string var_9055_mode_0 = const()[name = string("op_9055_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9055_cast_fp16 = gelu(mode = var_9055_mode_0, x = x_159_cast_fp16)[name = string("op_9055_cast_fp16")]; + tensor input_197_cast_fp16 = mul(x = var_9055_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575382016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581354048))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; + tensor var_9063_axes_0 = const()[name = string("op_9063_axes_0"), val = tensor([2])]; + tensor var_9063_cast_fp16 = squeeze(axes = var_9063_axes_0, x = e_19_cast_fp16)[name = string("op_9063_cast_fp16")]; + tensor var_9064 = const()[name = string("op_9064"), val = tensor([0, 2, 1])]; + int32 var_9075 = const()[name = string("op_9075"), val = int32(-1)]; + fp16 const_417_promoted_to_fp16 = const()[name = string("const_417_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_161_cast_fp16 = transpose(perm = var_9064, x = var_9063_cast_fp16)[name = string("transpose_144")]; + tensor var_9077_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_417_promoted_to_fp16)[name = string("op_9077_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_9075, interleave = input_199_interleave_0, values = (hidden_states_161_cast_fp16, var_9077_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_9072_to_fp16 = const()[name = string("op_9072_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_9072_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor var_9091_to_fp16 = const()[name = string("op_9091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581390976)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_9091_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor hidden_states_165_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = hidden_states_163_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + int32 var_9145 = const()[name = string("op_9145"), val = int32(-1)]; + fp16 const_422_promoted_to_fp16 = const()[name = string("const_422_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9147_cast_fp16 = mul(x = hidden_states_165_cast_fp16, y = const_422_promoted_to_fp16)[name = string("op_9147_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_9145, interleave = input_201_interleave_0, values = (hidden_states_165_cast_fp16, var_9147_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_9142_to_fp16 = const()[name = string("op_9142_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_9142_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor var_9161_to_fp16 = const()[name = string("op_9161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581393344)))]; + tensor hidden_states_167_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_9161_to_fp16)[name = string("hidden_states_167_cast_fp16")]; + tensor var_9172 = const()[name = string("op_9172"), val = tensor([0, 2, 1])]; + tensor var_9175_axes_0 = const()[name = string("op_9175_axes_0"), val = tensor([2])]; + tensor var_9173_cast_fp16 = transpose(perm = var_9172, x = hidden_states_167_cast_fp16)[name = string("transpose_143")]; + tensor var_9175_cast_fp16 = expand_dims(axes = var_9175_axes_0, x = var_9173_cast_fp16)[name = string("op_9175_cast_fp16")]; + string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; + tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; + tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; + int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; + tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_9175_cast_fp16)[name = string("query_states_81")]; + string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; + tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; + tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; + int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; + tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_9175_cast_fp16)[name = string("key_states_101")]; + string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; + tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; + tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; + int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; + tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_9175_cast_fp16)[name = string("value_states_81")]; + tensor var_9217 = const()[name = string("op_9217"), val = tensor([1, 4, 256, 64])]; + tensor var_9218 = reshape(shape = var_9217, x = query_states_81)[name = string("op_9218")]; + tensor var_9223 = const()[name = string("op_9223"), val = tensor([0, 1, 3, 2])]; + tensor var_9228 = const()[name = string("op_9228"), val = tensor([1, 1, 256, 64])]; + tensor var_9229 = reshape(shape = var_9228, x = key_states_101)[name = string("op_9229")]; + tensor var_9234 = const()[name = string("op_9234"), val = tensor([0, 1, 3, 2])]; + tensor var_9239 = const()[name = string("op_9239"), val = tensor([1, 1, 256, 64])]; + tensor var_9240 = reshape(shape = var_9239, x = value_states_81)[name = string("op_9240")]; + tensor var_9245 = const()[name = string("op_9245"), val = tensor([0, 1, 3, 2])]; + int32 var_9256 = const()[name = string("op_9256"), val = int32(-1)]; + fp16 const_427_promoted = const()[name = string("const_427_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_169 = transpose(perm = var_9223, x = var_9218)[name = string("transpose_142")]; + tensor var_9258 = mul(x = hidden_states_169, y = const_427_promoted)[name = string("op_9258")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_9256, interleave = input_205_interleave_0, values = (hidden_states_169, var_9258))[name = string("input_205")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_9253_to_fp16 = const()[name = string("op_9253_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_9253_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor var_9272_to_fp16 = const()[name = string("op_9272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581395712)))]; + tensor q_21_cast_fp16 = mul(x = normed_247, y = var_9272_to_fp16)[name = string("q_21_cast_fp16")]; + int32 var_9283 = const()[name = string("op_9283"), val = int32(-1)]; + fp16 const_431_promoted = const()[name = string("const_431_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_171 = transpose(perm = var_9234, x = var_9229)[name = string("transpose_141")]; + tensor var_9285 = mul(x = hidden_states_171, y = const_431_promoted)[name = string("op_9285")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207 = concat(axis = var_9283, interleave = input_207_interleave_0, values = (hidden_states_171, var_9285))[name = string("input_207")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_9280_to_fp16 = const()[name = string("op_9280_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_9280_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor var_9299_to_fp16 = const()[name = string("op_9299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396288)))]; + tensor k_21_cast_fp16 = mul(x = normed_251, y = var_9299_to_fp16)[name = string("k_21_cast_fp16")]; + tensor var_9313_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5)[name = string("op_9313_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_437_promoted_to_fp16 = const()[name = string("const_437_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9334_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_437_promoted_to_fp16)[name = string("op_9334_cast_fp16")]; + int32 var_9336 = const()[name = string("op_9336"), val = int32(-1)]; + bool var_9337_interleave_0 = const()[name = string("op_9337_interleave_0"), val = bool(false)]; + tensor var_9337_cast_fp16 = concat(axis = var_9336, interleave = var_9337_interleave_0, values = (var_9334_cast_fp16, x1_41_cast_fp16))[name = string("op_9337_cast_fp16")]; + tensor var_9338_cast_fp16 = mul(x = var_9337_cast_fp16, y = sin_5)[name = string("op_9338_cast_fp16")]; + tensor query_states_83_cast_fp16 = add(x = var_9313_cast_fp16, y = var_9338_cast_fp16)[name = string("query_states_83_cast_fp16")]; + tensor var_9341_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5)[name = string("op_9341_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_440_promoted_to_fp16 = const()[name = string("const_440_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9362_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_440_promoted_to_fp16)[name = string("op_9362_cast_fp16")]; + int32 var_9364 = const()[name = string("op_9364"), val = int32(-1)]; + bool var_9365_interleave_0 = const()[name = string("op_9365_interleave_0"), val = bool(false)]; + tensor var_9365_cast_fp16 = concat(axis = var_9364, interleave = var_9365_interleave_0, values = (var_9362_cast_fp16, x1_43_cast_fp16))[name = string("op_9365_cast_fp16")]; + tensor var_9366_cast_fp16 = mul(x = var_9365_cast_fp16, y = sin_5)[name = string("op_9366_cast_fp16")]; + tensor key_states_103_cast_fp16 = add(x = var_9341_cast_fp16, y = var_9366_cast_fp16)[name = string("key_states_103_cast_fp16")]; + tensor key_slice_19_begin_0 = const()[name = string("key_slice_19_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor key_slice_19_end_0 = const()[name = string("key_slice_19_end_0"), val = tensor([10, 1, 512, 256])]; + tensor key_slice_19_end_mask_0 = const()[name = string("key_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_19_cast_fp16 = slice_by_index(begin = key_slice_19_begin_0, end = key_slice_19_end_0, end_mask = key_slice_19_end_mask_0, x = coreml_update_state_71)[name = string("key_slice_19_cast_fp16")]; + tensor var_9403_begin_0 = const()[name = string("op_9403_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9403_end_0 = const()[name = string("op_9403_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_9403_end_mask_0 = const()[name = string("op_9403_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9403_cast_fp16 = slice_by_index(begin = var_9403_begin_0, end = var_9403_end_0, end_mask = var_9403_end_mask_0, x = key_slice_19_cast_fp16)[name = string("op_9403_cast_fp16")]; + int32 var_9430 = const()[name = string("op_9430"), val = int32(2)]; + bool shifted_key_19_interleave_0 = const()[name = string("shifted_key_19_interleave_0"), val = bool(false)]; + tensor shifted_key_19_cast_fp16 = concat(axis = var_9430, interleave = shifted_key_19_interleave_0, values = (var_9403_cast_fp16, key_states_103_cast_fp16))[name = string("shifted_key_19_cast_fp16")]; + tensor concat_140 = const()[name = string("concat_140"), val = tensor([9, 0, 0, 0])]; + tensor concat_141 = const()[name = string("concat_141"), val = tensor([10, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_140, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_141, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = shifted_key_19_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_176_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_176")]; + tensor value_slice_19_begin_0 = const()[name = string("value_slice_19_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor value_slice_19_end_0 = const()[name = string("value_slice_19_end_0"), val = tensor([32, 1, 512, 256])]; + tensor value_slice_19_end_mask_0 = const()[name = string("value_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_19_cast_fp16 = slice_by_index(begin = value_slice_19_begin_0, end = value_slice_19_end_0, end_mask = value_slice_19_end_mask_0, x = coreml_update_state_72)[name = string("value_slice_19_cast_fp16")]; + tensor var_9473_begin_0 = const()[name = string("op_9473_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9473_end_0 = const()[name = string("op_9473_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_9473_end_mask_0 = const()[name = string("op_9473_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9473_cast_fp16 = slice_by_index(begin = var_9473_begin_0, end = var_9473_end_0, end_mask = var_9473_end_mask_0, x = value_slice_19_cast_fp16)[name = string("op_9473_cast_fp16")]; + int32 var_9500 = const()[name = string("op_9500"), val = int32(2)]; + bool shifted_value_19_interleave_0 = const()[name = string("shifted_value_19_interleave_0"), val = bool(false)]; + tensor value_states_83 = transpose(perm = var_9245, x = var_9240)[name = string("transpose_140")]; + tensor shifted_value_19_cast_fp16 = concat(axis = var_9500, interleave = shifted_value_19_interleave_0, values = (var_9473_cast_fp16, value_states_83))[name = string("shifted_value_19_cast_fp16")]; + tensor concat_142 = const()[name = string("concat_142"), val = tensor([31, 0, 0, 0])]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([32, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_142, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_143, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = shifted_value_19_cast_fp16, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_177_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_177")]; + tensor var_9528_begin_0 = const()[name = string("op_9528_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_9528_end_0 = const()[name = string("op_9528_end_0"), val = tensor([10, 1, 512, 256])]; + tensor var_9528_end_mask_0 = const()[name = string("op_9528_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9528_cast_fp16 = slice_by_index(begin = var_9528_begin_0, end = var_9528_end_0, end_mask = var_9528_end_mask_0, x = coreml_update_state_73)[name = string("op_9528_cast_fp16")]; + tensor var_9535_begin_0 = const()[name = string("op_9535_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_9535_end_0 = const()[name = string("op_9535_end_0"), val = tensor([32, 1, 512, 256])]; + tensor var_9535_end_mask_0 = const()[name = string("op_9535_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9535_cast_fp16 = slice_by_index(begin = var_9535_begin_0, end = var_9535_end_0, end_mask = var_9535_end_mask_0, x = coreml_update_state_73)[name = string("op_9535_cast_fp16")]; + tensor var_9574 = const()[name = string("op_9574"), val = tensor([1, 4, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_9574, x = var_9528_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_9594 = const()[name = string("op_9594"), val = tensor([1, 4, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_9594, x = var_9535_cast_fp16)[name = string("x_171_cast_fp16")]; + bool var_9621_transpose_x_0 = const()[name = string("op_9621_transpose_x_0"), val = bool(false)]; + bool var_9621_transpose_y_0 = const()[name = string("op_9621_transpose_y_0"), val = bool(true)]; + tensor var_9621 = matmul(transpose_x = var_9621_transpose_x_0, transpose_y = var_9621_transpose_y_0, x = query_states_83_cast_fp16, y = x_165_cast_fp16)[name = string("op_9621")]; + fp16 var_9622_to_fp16 = const()[name = string("op_9622_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_41_cast_fp16 = mul(x = var_9621, y = var_9622_to_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = mask_slice_1)[name = string("attn_weights_43_cast_fp16")]; + int32 var_9657 = const()[name = string("op_9657"), val = int32(-1)]; + tensor var_9659_cast_fp16 = softmax(axis = var_9657, x = attn_weights_43_cast_fp16)[name = string("op_9659_cast_fp16")]; + tensor concat_148 = const()[name = string("concat_148"), val = tensor([4, 64, 512])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_148, x = var_9659_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor concat_149 = const()[name = string("concat_149"), val = tensor([4, 512, 256])]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_149, x = x_171_cast_fp16)[name = string("reshape_31_cast_fp16")]; + bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; + bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; + tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; + tensor concat_153 = const()[name = string("concat_153"), val = tensor([1, 4, 64, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_153, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor var_9671_perm_0 = const()[name = string("op_9671_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9690 = const()[name = string("op_9690"), val = tensor([1, 64, 1024])]; + tensor var_9671_cast_fp16 = transpose(perm = var_9671_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_139")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_9690, x = var_9671_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_9695 = const()[name = string("op_9695"), val = tensor([0, 2, 1])]; + string var_9711_pad_type_0 = const()[name = string("op_9711_pad_type_0"), val = string("valid")]; + int32 var_9711_groups_0 = const()[name = string("op_9711_groups_0"), val = int32(1)]; + tensor var_9711_strides_0 = const()[name = string("op_9711_strides_0"), val = tensor([1])]; + tensor var_9711_pad_0 = const()[name = string("op_9711_pad_0"), val = tensor([0, 0])]; + tensor var_9711_dilations_0 = const()[name = string("op_9711_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581396864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582281664))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9696_cast_fp16 = transpose(perm = var_9695, x = attn_output_105_cast_fp16)[name = string("transpose_138")]; + tensor var_9711_cast_fp16 = conv(dilations = var_9711_dilations_0, groups = var_9711_groups_0, pad = var_9711_pad_0, pad_type = var_9711_pad_type_0, strides = var_9711_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_9696_cast_fp16)[name = string("op_9711_cast_fp16")]; + tensor var_9715 = const()[name = string("op_9715"), val = tensor([0, 2, 1])]; + int32 var_9726 = const()[name = string("op_9726"), val = int32(-1)]; + fp16 const_451_promoted_to_fp16 = const()[name = string("const_451_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_173_cast_fp16 = transpose(perm = var_9715, x = var_9711_cast_fp16)[name = string("transpose_137")]; + tensor var_9728_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_451_promoted_to_fp16)[name = string("op_9728_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_9726, interleave = input_211_interleave_0, values = (hidden_states_173_cast_fp16, var_9728_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_9723_to_fp16 = const()[name = string("op_9723_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_9723_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor var_9742_to_fp16 = const()[name = string("op_9742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582318592)))]; + tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_9742_to_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = add(x = hidden_states_165_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + int32 var_9755 = const()[name = string("op_9755"), val = int32(-1)]; + fp16 const_455_promoted_to_fp16 = const()[name = string("const_455_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9757_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = const_455_promoted_to_fp16)[name = string("op_9757_cast_fp16")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213_cast_fp16 = concat(axis = var_9755, interleave = input_213_interleave_0, values = (hidden_states_175_cast_fp16, var_9757_cast_fp16))[name = string("input_213_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_9752_to_fp16 = const()[name = string("op_9752_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_9752_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor var_9771_to_fp16 = const()[name = string("op_9771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582320960)))]; + tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_9771_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_9783 = const()[name = string("op_9783"), val = tensor([0, 2, 1])]; + tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; + tensor var_9784_cast_fp16 = transpose(perm = var_9783, x = x_173_cast_fp16)[name = string("transpose_136")]; + tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_9784_cast_fp16)[name = string("input_215_cast_fp16")]; + string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; + tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; + tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; + int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582323328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588295360))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588516608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594488640))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; + string var_9809_mode_0 = const()[name = string("op_9809_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_9809_cast_fp16 = gelu(mode = var_9809_mode_0, x = x_175_cast_fp16)[name = string("op_9809_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_9809_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594709888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600681920))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; + tensor var_9817_axes_0 = const()[name = string("op_9817_axes_0"), val = tensor([2])]; + tensor var_9817_cast_fp16 = squeeze(axes = var_9817_axes_0, x = e_21_cast_fp16)[name = string("op_9817_cast_fp16")]; + tensor var_9818 = const()[name = string("op_9818"), val = tensor([0, 2, 1])]; + int32 var_9829 = const()[name = string("op_9829"), val = int32(-1)]; + fp16 const_459_promoted_to_fp16 = const()[name = string("const_459_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_177_cast_fp16 = transpose(perm = var_9818, x = var_9817_cast_fp16)[name = string("transpose_135")]; + tensor var_9831_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_459_promoted_to_fp16)[name = string("op_9831_cast_fp16")]; + bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; + tensor input_219_cast_fp16 = concat(axis = var_9829, interleave = input_219_interleave_0, values = (hidden_states_177_cast_fp16, var_9831_cast_fp16))[name = string("input_219_cast_fp16")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_9826_to_fp16 = const()[name = string("op_9826_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_9826_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; + tensor var_9845_to_fp16 = const()[name = string("op_9845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600718848)))]; + tensor hidden_states_179_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_9845_to_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + int32 var_9899 = const()[name = string("op_9899"), val = int32(-1)]; + fp16 const_464_promoted_to_fp16 = const()[name = string("const_464_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9901_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_464_promoted_to_fp16)[name = string("op_9901_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_9899, interleave = input_221_interleave_0, values = (hidden_states_181_cast_fp16, var_9901_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_9896_to_fp16 = const()[name = string("op_9896_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_9896_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; + tensor var_9915_to_fp16 = const()[name = string("op_9915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600721216)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_9915_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_9926 = const()[name = string("op_9926"), val = tensor([0, 2, 1])]; + tensor var_9929_axes_0 = const()[name = string("op_9929_axes_0"), val = tensor([2])]; + tensor var_9927_cast_fp16 = transpose(perm = var_9926, x = hidden_states_183_cast_fp16)[name = string("transpose_134")]; + tensor var_9929_cast_fp16 = expand_dims(axes = var_9929_axes_0, x = var_9927_cast_fp16)[name = string("op_9929_cast_fp16")]; + string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; + tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; + tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; + int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; + tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_9929_cast_fp16)[name = string("query_states_89")]; + string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; + tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; + tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; + int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; + tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_9929_cast_fp16)[name = string("key_states_111")]; + string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; + tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; + tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; + int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; + tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_9929_cast_fp16)[name = string("value_states_89")]; + tensor var_9971 = const()[name = string("op_9971"), val = tensor([1, 4, 256, 64])]; + tensor var_9972 = reshape(shape = var_9971, x = query_states_89)[name = string("op_9972")]; + tensor var_9977 = const()[name = string("op_9977"), val = tensor([0, 1, 3, 2])]; + tensor var_9982 = const()[name = string("op_9982"), val = tensor([1, 1, 256, 64])]; + tensor var_9983 = reshape(shape = var_9982, x = key_states_111)[name = string("op_9983")]; + tensor var_9988 = const()[name = string("op_9988"), val = tensor([0, 1, 3, 2])]; + tensor var_9993 = const()[name = string("op_9993"), val = tensor([1, 1, 256, 64])]; + tensor var_9994 = reshape(shape = var_9993, x = value_states_89)[name = string("op_9994")]; + tensor var_9999 = const()[name = string("op_9999"), val = tensor([0, 1, 3, 2])]; + int32 var_10010 = const()[name = string("op_10010"), val = int32(-1)]; + fp16 const_469_promoted = const()[name = string("const_469_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_185 = transpose(perm = var_9977, x = var_9972)[name = string("transpose_133")]; + tensor var_10012 = mul(x = hidden_states_185, y = const_469_promoted)[name = string("op_10012")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225 = concat(axis = var_10010, interleave = input_225_interleave_0, values = (hidden_states_185, var_10012))[name = string("input_225")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_10007_to_fp16 = const()[name = string("op_10007_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_10007_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; + tensor var_10026_to_fp16 = const()[name = string("op_10026_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600723584)))]; + tensor q_23_cast_fp16 = mul(x = normed_271, y = var_10026_to_fp16)[name = string("q_23_cast_fp16")]; + int32 var_10037 = const()[name = string("op_10037"), val = int32(-1)]; + fp16 const_473_promoted = const()[name = string("const_473_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_187 = transpose(perm = var_9988, x = var_9983)[name = string("transpose_132")]; + tensor var_10039 = mul(x = hidden_states_187, y = const_473_promoted)[name = string("op_10039")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227 = concat(axis = var_10037, interleave = input_227_interleave_0, values = (hidden_states_187, var_10039))[name = string("input_227")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_10034_to_fp16 = const()[name = string("op_10034_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_10034_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; + tensor var_10053_to_fp16 = const()[name = string("op_10053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724160)))]; + tensor k_23_cast_fp16 = mul(x = normed_275, y = var_10053_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_10067_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_35)[name = string("op_10067_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_479_promoted_to_fp16 = const()[name = string("const_479_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10088_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_479_promoted_to_fp16)[name = string("op_10088_cast_fp16")]; + int32 var_10090 = const()[name = string("op_10090"), val = int32(-1)]; + bool var_10091_interleave_0 = const()[name = string("op_10091_interleave_0"), val = bool(false)]; + tensor var_10091_cast_fp16 = concat(axis = var_10090, interleave = var_10091_interleave_0, values = (var_10088_cast_fp16, x1_45_cast_fp16))[name = string("op_10091_cast_fp16")]; + tensor var_10092_cast_fp16 = mul(x = var_10091_cast_fp16, y = sin_35)[name = string("op_10092_cast_fp16")]; + tensor query_states_91_cast_fp16 = add(x = var_10067_cast_fp16, y = var_10092_cast_fp16)[name = string("query_states_91_cast_fp16")]; + tensor var_10095_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_35)[name = string("op_10095_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10116_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_10116_cast_fp16")]; + int32 var_10118 = const()[name = string("op_10118"), val = int32(-1)]; + bool var_10119_interleave_0 = const()[name = string("op_10119_interleave_0"), val = bool(false)]; + tensor var_10119_cast_fp16 = concat(axis = var_10118, interleave = var_10119_interleave_0, values = (var_10116_cast_fp16, x1_47_cast_fp16))[name = string("op_10119_cast_fp16")]; + tensor var_10120_cast_fp16 = mul(x = var_10119_cast_fp16, y = sin_35)[name = string("op_10120_cast_fp16")]; + tensor key_states_113_cast_fp16 = add(x = var_10095_cast_fp16, y = var_10120_cast_fp16)[name = string("key_states_113_cast_fp16")]; + tensor concat_154 = const()[name = string("concat_154"), val = tensor([1, 0, 0, 0])]; + tensor concat_155 = const()[name = string("concat_155"), val = tensor([2, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_154, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_155, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_113_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_178_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_178")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([5, 0, 0, 0])]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([6, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_91 = transpose(perm = var_9999, x = var_9994)[name = string("transpose_131")]; + tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_156, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_157, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = value_states_91, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_179_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_179")]; + tensor var_10219_begin_0 = const()[name = string("op_10219_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_10219_end_0 = const()[name = string("op_10219_end_0"), val = tensor([2, 1, 4096, 256])]; + tensor var_10219_end_mask_0 = const()[name = string("op_10219_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10219_cast_fp16 = slice_by_index(begin = var_10219_begin_0, end = var_10219_end_0, end_mask = var_10219_end_mask_0, x = coreml_update_state_75)[name = string("op_10219_cast_fp16")]; + tensor var_10226_begin_0 = const()[name = string("op_10226_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_10226_end_0 = const()[name = string("op_10226_end_0"), val = tensor([6, 1, 4096, 256])]; + tensor var_10226_end_mask_0 = const()[name = string("op_10226_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10226_cast_fp16 = slice_by_index(begin = var_10226_begin_0, end = var_10226_end_0, end_mask = var_10226_end_mask_0, x = coreml_update_state_75)[name = string("op_10226_cast_fp16")]; + tensor var_10265 = const()[name = string("op_10265"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_10265, x = var_10219_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_10285 = const()[name = string("op_10285"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_10285, x = var_10226_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_10312_transpose_x_0 = const()[name = string("op_10312_transpose_x_0"), val = bool(false)]; + bool var_10312_transpose_y_0 = const()[name = string("op_10312_transpose_y_0"), val = bool(true)]; + tensor var_10312 = matmul(transpose_x = var_10312_transpose_x_0, transpose_y = var_10312_transpose_y_0, x = query_states_91_cast_fp16, y = x_181_cast_fp16)[name = string("op_10312")]; + fp16 var_10313_to_fp16 = const()[name = string("op_10313_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_45_cast_fp16 = mul(x = var_10312, y = var_10313_to_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; + int32 var_10348 = const()[name = string("op_10348"), val = int32(-1)]; + tensor var_10350_cast_fp16 = softmax(axis = var_10348, x = attn_weights_47_cast_fp16)[name = string("op_10350_cast_fp16")]; + tensor concat_162 = const()[name = string("concat_162"), val = tensor([4, 64, 4096])]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_162, x = var_10350_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor concat_163 = const()[name = string("concat_163"), val = tensor([4, 4096, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_163, x = x_187_cast_fp16)[name = string("reshape_34_cast_fp16")]; + bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; + bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; + tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; + tensor concat_167 = const()[name = string("concat_167"), val = tensor([1, 4, 64, 256])]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_167, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor var_10362_perm_0 = const()[name = string("op_10362_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10381 = const()[name = string("op_10381"), val = tensor([1, 64, 1024])]; + tensor var_10362_cast_fp16 = transpose(perm = var_10362_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_130")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_10381, x = var_10362_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_10386 = const()[name = string("op_10386"), val = tensor([0, 2, 1])]; + string var_10402_pad_type_0 = const()[name = string("op_10402_pad_type_0"), val = string("valid")]; + int32 var_10402_groups_0 = const()[name = string("op_10402_groups_0"), val = int32(1)]; + tensor var_10402_strides_0 = const()[name = string("op_10402_strides_0"), val = tensor([1])]; + tensor var_10402_pad_0 = const()[name = string("op_10402_pad_0"), val = tensor([0, 0])]; + tensor var_10402_dilations_0 = const()[name = string("op_10402_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600724736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601609536))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10387_cast_fp16 = transpose(perm = var_10386, x = attn_output_115_cast_fp16)[name = string("transpose_129")]; + tensor var_10402_cast_fp16 = conv(dilations = var_10402_dilations_0, groups = var_10402_groups_0, pad = var_10402_pad_0, pad_type = var_10402_pad_type_0, strides = var_10402_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_10387_cast_fp16)[name = string("op_10402_cast_fp16")]; + tensor var_10406 = const()[name = string("op_10406"), val = tensor([0, 2, 1])]; + int32 var_10417 = const()[name = string("op_10417"), val = int32(-1)]; + fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_189_cast_fp16 = transpose(perm = var_10406, x = var_10402_cast_fp16)[name = string("transpose_128")]; + tensor var_10419_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_10419_cast_fp16")]; + bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; + tensor input_231_cast_fp16 = concat(axis = var_10417, interleave = input_231_interleave_0, values = (hidden_states_189_cast_fp16, var_10419_cast_fp16))[name = string("input_231_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_10414_to_fp16 = const()[name = string("op_10414_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_10414_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; + tensor var_10433_to_fp16 = const()[name = string("op_10433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601646464)))]; + tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_10433_to_fp16)[name = string("attn_output_119_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + int32 var_10446 = const()[name = string("op_10446"), val = int32(-1)]; + fp16 const_498_promoted_to_fp16 = const()[name = string("const_498_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10448_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_498_promoted_to_fp16)[name = string("op_10448_cast_fp16")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233_cast_fp16 = concat(axis = var_10446, interleave = input_233_interleave_0, values = (hidden_states_191_cast_fp16, var_10448_cast_fp16))[name = string("input_233_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_10443_to_fp16 = const()[name = string("op_10443_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_10443_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; + tensor var_10462_to_fp16 = const()[name = string("op_10462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601648832)))]; + tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_10462_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_10474 = const()[name = string("op_10474"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_10475_cast_fp16 = transpose(perm = var_10474, x = x_189_cast_fp16)[name = string("transpose_127")]; + tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_10475_cast_fp16)[name = string("input_235_cast_fp16")]; + string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; + tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; + tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; + int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601651200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607623232))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607844480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613816512))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; + string var_10500_mode_0 = const()[name = string("op_10500_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_10500_cast_fp16 = gelu(mode = var_10500_mode_0, x = x_191_cast_fp16)[name = string("op_10500_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = var_10500_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614037760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620009792))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; + tensor var_10508_axes_0 = const()[name = string("op_10508_axes_0"), val = tensor([2])]; + tensor var_10508_cast_fp16 = squeeze(axes = var_10508_axes_0, x = e_23_cast_fp16)[name = string("op_10508_cast_fp16")]; + tensor var_10509 = const()[name = string("op_10509"), val = tensor([0, 2, 1])]; + int32 var_10520 = const()[name = string("op_10520"), val = int32(-1)]; + fp16 const_502_promoted_to_fp16 = const()[name = string("const_502_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_193_cast_fp16 = transpose(perm = var_10509, x = var_10508_cast_fp16)[name = string("transpose_126")]; + tensor var_10522_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_502_promoted_to_fp16)[name = string("op_10522_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_10520, interleave = input_239_interleave_0, values = (hidden_states_193_cast_fp16, var_10522_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_10517_to_fp16 = const()[name = string("op_10517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_10517_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor var_10536_to_fp16 = const()[name = string("op_10536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620046720)))]; + tensor hidden_states_195_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_10536_to_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = hidden_states_195_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + int32 var_10590 = const()[name = string("op_10590"), val = int32(-1)]; + fp16 const_507_promoted_to_fp16 = const()[name = string("const_507_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10592_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = const_507_promoted_to_fp16)[name = string("op_10592_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_10590, interleave = input_241_interleave_0, values = (hidden_states_197_cast_fp16, var_10592_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_10587_to_fp16 = const()[name = string("op_10587_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_10587_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor var_10606_to_fp16 = const()[name = string("op_10606_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620049088)))]; + tensor hidden_states_199_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_10606_to_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor var_10617 = const()[name = string("op_10617"), val = tensor([0, 2, 1])]; + tensor var_10620_axes_0 = const()[name = string("op_10620_axes_0"), val = tensor([2])]; + tensor var_10618_cast_fp16 = transpose(perm = var_10617, x = hidden_states_199_cast_fp16)[name = string("transpose_125")]; + tensor var_10620_cast_fp16 = expand_dims(axes = var_10620_axes_0, x = var_10618_cast_fp16)[name = string("op_10620_cast_fp16")]; + string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; + tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; + tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; + int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; + tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_10620_cast_fp16)[name = string("query_states_97")]; + string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; + tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; + tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; + int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; + tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_10620_cast_fp16)[name = string("key_states_121")]; + string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; + tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; + tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; + int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; + tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_10620_cast_fp16)[name = string("value_states_97")]; + tensor var_10662 = const()[name = string("op_10662"), val = tensor([1, 4, 256, 64])]; + tensor var_10663 = reshape(shape = var_10662, x = query_states_97)[name = string("op_10663")]; + tensor var_10668 = const()[name = string("op_10668"), val = tensor([0, 1, 3, 2])]; + tensor var_10673 = const()[name = string("op_10673"), val = tensor([1, 1, 256, 64])]; + tensor var_10674 = reshape(shape = var_10673, x = key_states_121)[name = string("op_10674")]; + tensor var_10679 = const()[name = string("op_10679"), val = tensor([0, 1, 3, 2])]; + tensor var_10684 = const()[name = string("op_10684"), val = tensor([1, 1, 256, 64])]; + tensor var_10685 = reshape(shape = var_10684, x = value_states_97)[name = string("op_10685")]; + tensor var_10690 = const()[name = string("op_10690"), val = tensor([0, 1, 3, 2])]; + int32 var_10701 = const()[name = string("op_10701"), val = int32(-1)]; + fp16 const_512_promoted = const()[name = string("const_512_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_201 = transpose(perm = var_10668, x = var_10663)[name = string("transpose_124")]; + tensor var_10703 = mul(x = hidden_states_201, y = const_512_promoted)[name = string("op_10703")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_10701, interleave = input_245_interleave_0, values = (hidden_states_201, var_10703))[name = string("input_245")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_10698_to_fp16 = const()[name = string("op_10698_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_10698_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor var_10717_to_fp16 = const()[name = string("op_10717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620051456)))]; + tensor q_25_cast_fp16 = mul(x = normed_295, y = var_10717_to_fp16)[name = string("q_25_cast_fp16")]; + int32 var_10728 = const()[name = string("op_10728"), val = int32(-1)]; + fp16 const_516_promoted = const()[name = string("const_516_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_203 = transpose(perm = var_10679, x = var_10674)[name = string("transpose_123")]; + tensor var_10730 = mul(x = hidden_states_203, y = const_516_promoted)[name = string("op_10730")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_10728, interleave = input_247_interleave_0, values = (hidden_states_203, var_10730))[name = string("input_247")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_10725_to_fp16 = const()[name = string("op_10725_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_10725_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor var_10744_to_fp16 = const()[name = string("op_10744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052032)))]; + tensor k_25_cast_fp16 = mul(x = normed_299, y = var_10744_to_fp16)[name = string("k_25_cast_fp16")]; + tensor var_10758_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5)[name = string("op_10758_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_522_promoted_to_fp16 = const()[name = string("const_522_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10779_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_522_promoted_to_fp16)[name = string("op_10779_cast_fp16")]; + int32 var_10781 = const()[name = string("op_10781"), val = int32(-1)]; + bool var_10782_interleave_0 = const()[name = string("op_10782_interleave_0"), val = bool(false)]; + tensor var_10782_cast_fp16 = concat(axis = var_10781, interleave = var_10782_interleave_0, values = (var_10779_cast_fp16, x1_49_cast_fp16))[name = string("op_10782_cast_fp16")]; + tensor var_10783_cast_fp16 = mul(x = var_10782_cast_fp16, y = sin_5)[name = string("op_10783_cast_fp16")]; + tensor query_states_99_cast_fp16 = add(x = var_10758_cast_fp16, y = var_10783_cast_fp16)[name = string("query_states_99_cast_fp16")]; + tensor var_10786_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5)[name = string("op_10786_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_525_promoted_to_fp16 = const()[name = string("const_525_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10807_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_525_promoted_to_fp16)[name = string("op_10807_cast_fp16")]; + int32 var_10809 = const()[name = string("op_10809"), val = int32(-1)]; + bool var_10810_interleave_0 = const()[name = string("op_10810_interleave_0"), val = bool(false)]; + tensor var_10810_cast_fp16 = concat(axis = var_10809, interleave = var_10810_interleave_0, values = (var_10807_cast_fp16, x1_51_cast_fp16))[name = string("op_10810_cast_fp16")]; + tensor var_10811_cast_fp16 = mul(x = var_10810_cast_fp16, y = sin_5)[name = string("op_10811_cast_fp16")]; + tensor key_states_123_cast_fp16 = add(x = var_10786_cast_fp16, y = var_10811_cast_fp16)[name = string("key_states_123_cast_fp16")]; + tensor key_slice_21_begin_0 = const()[name = string("key_slice_21_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor key_slice_21_end_0 = const()[name = string("key_slice_21_end_0"), val = tensor([11, 1, 512, 256])]; + tensor key_slice_21_end_mask_0 = const()[name = string("key_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_21_cast_fp16 = slice_by_index(begin = key_slice_21_begin_0, end = key_slice_21_end_0, end_mask = key_slice_21_end_mask_0, x = coreml_update_state_73)[name = string("key_slice_21_cast_fp16")]; + tensor var_10848_begin_0 = const()[name = string("op_10848_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10848_end_0 = const()[name = string("op_10848_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_10848_end_mask_0 = const()[name = string("op_10848_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10848_cast_fp16 = slice_by_index(begin = var_10848_begin_0, end = var_10848_end_0, end_mask = var_10848_end_mask_0, x = key_slice_21_cast_fp16)[name = string("op_10848_cast_fp16")]; + int32 var_10875 = const()[name = string("op_10875"), val = int32(2)]; + bool shifted_key_21_interleave_0 = const()[name = string("shifted_key_21_interleave_0"), val = bool(false)]; + tensor shifted_key_21_cast_fp16 = concat(axis = var_10875, interleave = shifted_key_21_interleave_0, values = (var_10848_cast_fp16, key_states_123_cast_fp16))[name = string("shifted_key_21_cast_fp16")]; + tensor concat_168 = const()[name = string("concat_168"), val = tensor([10, 0, 0, 0])]; + tensor concat_169 = const()[name = string("concat_169"), val = tensor([11, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = shifted_key_21_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_180_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_180")]; + tensor value_slice_21_begin_0 = const()[name = string("value_slice_21_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor value_slice_21_end_0 = const()[name = string("value_slice_21_end_0"), val = tensor([33, 1, 512, 256])]; + tensor value_slice_21_end_mask_0 = const()[name = string("value_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_21_cast_fp16 = slice_by_index(begin = value_slice_21_begin_0, end = value_slice_21_end_0, end_mask = value_slice_21_end_mask_0, x = coreml_update_state_76)[name = string("value_slice_21_cast_fp16")]; + tensor var_10918_begin_0 = const()[name = string("op_10918_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10918_end_0 = const()[name = string("op_10918_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_10918_end_mask_0 = const()[name = string("op_10918_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10918_cast_fp16 = slice_by_index(begin = var_10918_begin_0, end = var_10918_end_0, end_mask = var_10918_end_mask_0, x = value_slice_21_cast_fp16)[name = string("op_10918_cast_fp16")]; + int32 var_10945 = const()[name = string("op_10945"), val = int32(2)]; + bool shifted_value_21_interleave_0 = const()[name = string("shifted_value_21_interleave_0"), val = bool(false)]; + tensor value_states_99 = transpose(perm = var_10690, x = var_10685)[name = string("transpose_122")]; + tensor shifted_value_21_cast_fp16 = concat(axis = var_10945, interleave = shifted_value_21_interleave_0, values = (var_10918_cast_fp16, value_states_99))[name = string("shifted_value_21_cast_fp16")]; + tensor concat_170 = const()[name = string("concat_170"), val = tensor([32, 0, 0, 0])]; + tensor concat_171 = const()[name = string("concat_171"), val = tensor([33, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_170, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_171, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = shifted_value_21_cast_fp16, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_181_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_181")]; + tensor var_10973_begin_0 = const()[name = string("op_10973_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_10973_end_0 = const()[name = string("op_10973_end_0"), val = tensor([11, 1, 512, 256])]; + tensor var_10973_end_mask_0 = const()[name = string("op_10973_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10973_cast_fp16 = slice_by_index(begin = var_10973_begin_0, end = var_10973_end_0, end_mask = var_10973_end_mask_0, x = coreml_update_state_77)[name = string("op_10973_cast_fp16")]; + tensor var_10980_begin_0 = const()[name = string("op_10980_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_10980_end_0 = const()[name = string("op_10980_end_0"), val = tensor([33, 1, 512, 256])]; + tensor var_10980_end_mask_0 = const()[name = string("op_10980_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10980_cast_fp16 = slice_by_index(begin = var_10980_begin_0, end = var_10980_end_0, end_mask = var_10980_end_mask_0, x = coreml_update_state_77)[name = string("op_10980_cast_fp16")]; + tensor var_11019 = const()[name = string("op_11019"), val = tensor([1, 4, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_11019, x = var_10973_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_11039 = const()[name = string("op_11039"), val = tensor([1, 4, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_11039, x = var_10980_cast_fp16)[name = string("x_203_cast_fp16")]; + bool var_11066_transpose_x_0 = const()[name = string("op_11066_transpose_x_0"), val = bool(false)]; + bool var_11066_transpose_y_0 = const()[name = string("op_11066_transpose_y_0"), val = bool(true)]; + tensor var_11066 = matmul(transpose_x = var_11066_transpose_x_0, transpose_y = var_11066_transpose_y_0, x = query_states_99_cast_fp16, y = x_197_cast_fp16)[name = string("op_11066")]; + fp16 var_11067_to_fp16 = const()[name = string("op_11067_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_11066, y = var_11067_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = mask_slice_1)[name = string("attn_weights_51_cast_fp16")]; + int32 var_11102 = const()[name = string("op_11102"), val = int32(-1)]; + tensor var_11104_cast_fp16 = softmax(axis = var_11102, x = attn_weights_51_cast_fp16)[name = string("op_11104_cast_fp16")]; + tensor concat_176 = const()[name = string("concat_176"), val = tensor([4, 64, 512])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_176, x = var_11104_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor concat_177 = const()[name = string("concat_177"), val = tensor([4, 512, 256])]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_177, x = x_203_cast_fp16)[name = string("reshape_37_cast_fp16")]; + bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; + bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; + tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; + tensor concat_181 = const()[name = string("concat_181"), val = tensor([1, 4, 64, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_181, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor var_11116_perm_0 = const()[name = string("op_11116_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11135 = const()[name = string("op_11135"), val = tensor([1, 64, 1024])]; + tensor var_11116_cast_fp16 = transpose(perm = var_11116_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_121")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_11135, x = var_11116_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_11140 = const()[name = string("op_11140"), val = tensor([0, 2, 1])]; + string var_11156_pad_type_0 = const()[name = string("op_11156_pad_type_0"), val = string("valid")]; + int32 var_11156_groups_0 = const()[name = string("op_11156_groups_0"), val = int32(1)]; + tensor var_11156_strides_0 = const()[name = string("op_11156_strides_0"), val = tensor([1])]; + tensor var_11156_pad_0 = const()[name = string("op_11156_pad_0"), val = tensor([0, 0])]; + tensor var_11156_dilations_0 = const()[name = string("op_11156_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620052608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620937408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11141_cast_fp16 = transpose(perm = var_11140, x = attn_output_125_cast_fp16)[name = string("transpose_120")]; + tensor var_11156_cast_fp16 = conv(dilations = var_11156_dilations_0, groups = var_11156_groups_0, pad = var_11156_pad_0, pad_type = var_11156_pad_type_0, strides = var_11156_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_11141_cast_fp16)[name = string("op_11156_cast_fp16")]; + tensor var_11160 = const()[name = string("op_11160"), val = tensor([0, 2, 1])]; + int32 var_11171 = const()[name = string("op_11171"), val = int32(-1)]; + fp16 const_536_promoted_to_fp16 = const()[name = string("const_536_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_205_cast_fp16 = transpose(perm = var_11160, x = var_11156_cast_fp16)[name = string("transpose_119")]; + tensor var_11173_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_536_promoted_to_fp16)[name = string("op_11173_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_11171, interleave = input_251_interleave_0, values = (hidden_states_205_cast_fp16, var_11173_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_11168_to_fp16 = const()[name = string("op_11168_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_11168_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor var_11187_to_fp16 = const()[name = string("op_11187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620974336)))]; + tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_11187_to_fp16)[name = string("attn_output_129_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = add(x = hidden_states_197_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + int32 var_11200 = const()[name = string("op_11200"), val = int32(-1)]; + fp16 const_540_promoted_to_fp16 = const()[name = string("const_540_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11202_cast_fp16 = mul(x = hidden_states_207_cast_fp16, y = const_540_promoted_to_fp16)[name = string("op_11202_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_11200, interleave = input_253_interleave_0, values = (hidden_states_207_cast_fp16, var_11202_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_11197_to_fp16 = const()[name = string("op_11197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_11197_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor var_11216_to_fp16 = const()[name = string("op_11216_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620976704)))]; + tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_11216_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_11228 = const()[name = string("op_11228"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_11229_cast_fp16 = transpose(perm = var_11228, x = x_205_cast_fp16)[name = string("transpose_118")]; + tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_11229_cast_fp16)[name = string("input_255_cast_fp16")]; + string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; + tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; + tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; + int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620979072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626951104))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627172352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633144384))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; + string var_11254_mode_0 = const()[name = string("op_11254_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_11254_cast_fp16 = gelu(mode = var_11254_mode_0, x = x_207_cast_fp16)[name = string("op_11254_cast_fp16")]; + tensor input_257_cast_fp16 = mul(x = var_11254_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633365632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639337664))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; + tensor var_11262_axes_0 = const()[name = string("op_11262_axes_0"), val = tensor([2])]; + tensor var_11262_cast_fp16 = squeeze(axes = var_11262_axes_0, x = e_25_cast_fp16)[name = string("op_11262_cast_fp16")]; + tensor var_11263 = const()[name = string("op_11263"), val = tensor([0, 2, 1])]; + int32 var_11274 = const()[name = string("op_11274"), val = int32(-1)]; + fp16 const_544_promoted_to_fp16 = const()[name = string("const_544_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_209_cast_fp16 = transpose(perm = var_11263, x = var_11262_cast_fp16)[name = string("transpose_117")]; + tensor var_11276_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_544_promoted_to_fp16)[name = string("op_11276_cast_fp16")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259_cast_fp16 = concat(axis = var_11274, interleave = input_259_interleave_0, values = (hidden_states_209_cast_fp16, var_11276_cast_fp16))[name = string("input_259_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_11271_to_fp16 = const()[name = string("op_11271_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_11271_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; + tensor var_11290_to_fp16 = const()[name = string("op_11290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639374592)))]; + tensor hidden_states_211_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_11290_to_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = hidden_states_211_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + int32 var_11344 = const()[name = string("op_11344"), val = int32(-1)]; + fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11346_cast_fp16 = mul(x = hidden_states_213_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_11346_cast_fp16")]; + bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; + tensor input_261_cast_fp16 = concat(axis = var_11344, interleave = input_261_interleave_0, values = (hidden_states_213_cast_fp16, var_11346_cast_fp16))[name = string("input_261_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_11341_to_fp16 = const()[name = string("op_11341_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_11341_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; + tensor var_11360_to_fp16 = const()[name = string("op_11360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639376960)))]; + tensor hidden_states_215_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_11360_to_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor var_11371 = const()[name = string("op_11371"), val = tensor([0, 2, 1])]; + tensor var_11374_axes_0 = const()[name = string("op_11374_axes_0"), val = tensor([2])]; + tensor var_11372_cast_fp16 = transpose(perm = var_11371, x = hidden_states_215_cast_fp16)[name = string("transpose_116")]; + tensor var_11374_cast_fp16 = expand_dims(axes = var_11374_axes_0, x = var_11372_cast_fp16)[name = string("op_11374_cast_fp16")]; + string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; + tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; + tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; + int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; + tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_11374_cast_fp16)[name = string("query_states_105")]; + string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; + tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; + tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; + int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; + tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_11374_cast_fp16)[name = string("key_states_131")]; + string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; + tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; + tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; + int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; + tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_11374_cast_fp16)[name = string("value_states_105")]; + tensor var_11416 = const()[name = string("op_11416"), val = tensor([1, 4, 256, 64])]; + tensor var_11417 = reshape(shape = var_11416, x = query_states_105)[name = string("op_11417")]; + tensor var_11422 = const()[name = string("op_11422"), val = tensor([0, 1, 3, 2])]; + tensor var_11427 = const()[name = string("op_11427"), val = tensor([1, 1, 256, 64])]; + tensor var_11428 = reshape(shape = var_11427, x = key_states_131)[name = string("op_11428")]; + tensor var_11433 = const()[name = string("op_11433"), val = tensor([0, 1, 3, 2])]; + tensor var_11438 = const()[name = string("op_11438"), val = tensor([1, 1, 256, 64])]; + tensor var_11439 = reshape(shape = var_11438, x = value_states_105)[name = string("op_11439")]; + tensor var_11444 = const()[name = string("op_11444"), val = tensor([0, 1, 3, 2])]; + int32 var_11455 = const()[name = string("op_11455"), val = int32(-1)]; + fp16 const_554_promoted = const()[name = string("const_554_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_217 = transpose(perm = var_11422, x = var_11417)[name = string("transpose_115")]; + tensor var_11457 = mul(x = hidden_states_217, y = const_554_promoted)[name = string("op_11457")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265 = concat(axis = var_11455, interleave = input_265_interleave_0, values = (hidden_states_217, var_11457))[name = string("input_265")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_11452_to_fp16 = const()[name = string("op_11452_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_11452_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; + tensor var_11471_to_fp16 = const()[name = string("op_11471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379328)))]; + tensor q_27_cast_fp16 = mul(x = normed_319, y = var_11471_to_fp16)[name = string("q_27_cast_fp16")]; + int32 var_11482 = const()[name = string("op_11482"), val = int32(-1)]; + fp16 const_558_promoted = const()[name = string("const_558_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_219 = transpose(perm = var_11433, x = var_11428)[name = string("transpose_114")]; + tensor var_11484 = mul(x = hidden_states_219, y = const_558_promoted)[name = string("op_11484")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267 = concat(axis = var_11482, interleave = input_267_interleave_0, values = (hidden_states_219, var_11484))[name = string("input_267")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_11479_to_fp16 = const()[name = string("op_11479_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_11479_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; + tensor var_11498_to_fp16 = const()[name = string("op_11498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639379904)))]; + tensor k_27_cast_fp16 = mul(x = normed_323, y = var_11498_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_11512_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5)[name = string("op_11512_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_564_promoted_to_fp16 = const()[name = string("const_564_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11533_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_564_promoted_to_fp16)[name = string("op_11533_cast_fp16")]; + int32 var_11535 = const()[name = string("op_11535"), val = int32(-1)]; + bool var_11536_interleave_0 = const()[name = string("op_11536_interleave_0"), val = bool(false)]; + tensor var_11536_cast_fp16 = concat(axis = var_11535, interleave = var_11536_interleave_0, values = (var_11533_cast_fp16, x1_53_cast_fp16))[name = string("op_11536_cast_fp16")]; + tensor var_11537_cast_fp16 = mul(x = var_11536_cast_fp16, y = sin_5)[name = string("op_11537_cast_fp16")]; + tensor query_states_107_cast_fp16 = add(x = var_11512_cast_fp16, y = var_11537_cast_fp16)[name = string("query_states_107_cast_fp16")]; + tensor var_11540_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5)[name = string("op_11540_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_567_promoted_to_fp16 = const()[name = string("const_567_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11561_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_567_promoted_to_fp16)[name = string("op_11561_cast_fp16")]; + int32 var_11563 = const()[name = string("op_11563"), val = int32(-1)]; + bool var_11564_interleave_0 = const()[name = string("op_11564_interleave_0"), val = bool(false)]; + tensor var_11564_cast_fp16 = concat(axis = var_11563, interleave = var_11564_interleave_0, values = (var_11561_cast_fp16, x1_55_cast_fp16))[name = string("op_11564_cast_fp16")]; + tensor var_11565_cast_fp16 = mul(x = var_11564_cast_fp16, y = sin_5)[name = string("op_11565_cast_fp16")]; + tensor key_states_133_cast_fp16 = add(x = var_11540_cast_fp16, y = var_11565_cast_fp16)[name = string("key_states_133_cast_fp16")]; + tensor key_slice_23_begin_0 = const()[name = string("key_slice_23_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor key_slice_23_end_0 = const()[name = string("key_slice_23_end_0"), val = tensor([12, 1, 512, 256])]; + tensor key_slice_23_end_mask_0 = const()[name = string("key_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_23_cast_fp16 = slice_by_index(begin = key_slice_23_begin_0, end = key_slice_23_end_0, end_mask = key_slice_23_end_mask_0, x = coreml_update_state_77)[name = string("key_slice_23_cast_fp16")]; + tensor var_11602_begin_0 = const()[name = string("op_11602_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_11602_end_0 = const()[name = string("op_11602_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_11602_end_mask_0 = const()[name = string("op_11602_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_11602_cast_fp16 = slice_by_index(begin = var_11602_begin_0, end = var_11602_end_0, end_mask = var_11602_end_mask_0, x = key_slice_23_cast_fp16)[name = string("op_11602_cast_fp16")]; + int32 var_11629 = const()[name = string("op_11629"), val = int32(2)]; + bool shifted_key_23_interleave_0 = const()[name = string("shifted_key_23_interleave_0"), val = bool(false)]; + tensor shifted_key_23_cast_fp16 = concat(axis = var_11629, interleave = shifted_key_23_interleave_0, values = (var_11602_cast_fp16, key_states_133_cast_fp16))[name = string("shifted_key_23_cast_fp16")]; + tensor concat_182 = const()[name = string("concat_182"), val = tensor([11, 0, 0, 0])]; + tensor concat_183 = const()[name = string("concat_183"), val = tensor([12, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = shifted_key_23_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_182_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_182")]; + tensor value_slice_23_begin_0 = const()[name = string("value_slice_23_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor value_slice_23_end_0 = const()[name = string("value_slice_23_end_0"), val = tensor([34, 1, 512, 256])]; + tensor value_slice_23_end_mask_0 = const()[name = string("value_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_23_cast_fp16 = slice_by_index(begin = value_slice_23_begin_0, end = value_slice_23_end_0, end_mask = value_slice_23_end_mask_0, x = coreml_update_state_78)[name = string("value_slice_23_cast_fp16")]; + tensor var_11672_begin_0 = const()[name = string("op_11672_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_11672_end_0 = const()[name = string("op_11672_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_11672_end_mask_0 = const()[name = string("op_11672_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_11672_cast_fp16 = slice_by_index(begin = var_11672_begin_0, end = var_11672_end_0, end_mask = var_11672_end_mask_0, x = value_slice_23_cast_fp16)[name = string("op_11672_cast_fp16")]; + int32 var_11699 = const()[name = string("op_11699"), val = int32(2)]; + bool shifted_value_23_interleave_0 = const()[name = string("shifted_value_23_interleave_0"), val = bool(false)]; + tensor value_states_107 = transpose(perm = var_11444, x = var_11439)[name = string("transpose_113")]; + tensor shifted_value_23_cast_fp16 = concat(axis = var_11699, interleave = shifted_value_23_interleave_0, values = (var_11672_cast_fp16, value_states_107))[name = string("shifted_value_23_cast_fp16")]; + tensor concat_184 = const()[name = string("concat_184"), val = tensor([33, 0, 0, 0])]; + tensor concat_185 = const()[name = string("concat_185"), val = tensor([34, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_184, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_185, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = shifted_value_23_cast_fp16, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_183_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_183")]; + tensor var_11727_begin_0 = const()[name = string("op_11727_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_11727_end_0 = const()[name = string("op_11727_end_0"), val = tensor([12, 1, 512, 256])]; + tensor var_11727_end_mask_0 = const()[name = string("op_11727_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11727_cast_fp16 = slice_by_index(begin = var_11727_begin_0, end = var_11727_end_0, end_mask = var_11727_end_mask_0, x = coreml_update_state_79)[name = string("op_11727_cast_fp16")]; + tensor var_11734_begin_0 = const()[name = string("op_11734_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_11734_end_0 = const()[name = string("op_11734_end_0"), val = tensor([34, 1, 512, 256])]; + tensor var_11734_end_mask_0 = const()[name = string("op_11734_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11734_cast_fp16 = slice_by_index(begin = var_11734_begin_0, end = var_11734_end_0, end_mask = var_11734_end_mask_0, x = coreml_update_state_79)[name = string("op_11734_cast_fp16")]; + tensor var_11773 = const()[name = string("op_11773"), val = tensor([1, 4, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_11773, x = var_11727_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_11793 = const()[name = string("op_11793"), val = tensor([1, 4, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_11793, x = var_11734_cast_fp16)[name = string("x_219_cast_fp16")]; + bool var_11820_transpose_x_0 = const()[name = string("op_11820_transpose_x_0"), val = bool(false)]; + bool var_11820_transpose_y_0 = const()[name = string("op_11820_transpose_y_0"), val = bool(true)]; + tensor var_11820 = matmul(transpose_x = var_11820_transpose_x_0, transpose_y = var_11820_transpose_y_0, x = query_states_107_cast_fp16, y = x_213_cast_fp16)[name = string("op_11820")]; + fp16 var_11821_to_fp16 = const()[name = string("op_11821_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_53_cast_fp16 = mul(x = var_11820, y = var_11821_to_fp16)[name = string("attn_weights_53_cast_fp16")]; + tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = mask_slice_1)[name = string("attn_weights_55_cast_fp16")]; + int32 var_11856 = const()[name = string("op_11856"), val = int32(-1)]; + tensor var_11858_cast_fp16 = softmax(axis = var_11856, x = attn_weights_55_cast_fp16)[name = string("op_11858_cast_fp16")]; + tensor concat_190 = const()[name = string("concat_190"), val = tensor([4, 64, 512])]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_190, x = var_11858_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor concat_191 = const()[name = string("concat_191"), val = tensor([4, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_191, x = x_219_cast_fp16)[name = string("reshape_40_cast_fp16")]; + bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; + bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; + tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; + tensor concat_195 = const()[name = string("concat_195"), val = tensor([1, 4, 64, 256])]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_195, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor var_11870_perm_0 = const()[name = string("op_11870_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11889 = const()[name = string("op_11889"), val = tensor([1, 64, 1024])]; + tensor var_11870_cast_fp16 = transpose(perm = var_11870_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_112")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_11889, x = var_11870_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_11894 = const()[name = string("op_11894"), val = tensor([0, 2, 1])]; + string var_11910_pad_type_0 = const()[name = string("op_11910_pad_type_0"), val = string("valid")]; + int32 var_11910_groups_0 = const()[name = string("op_11910_groups_0"), val = int32(1)]; + tensor var_11910_strides_0 = const()[name = string("op_11910_strides_0"), val = tensor([1])]; + tensor var_11910_pad_0 = const()[name = string("op_11910_pad_0"), val = tensor([0, 0])]; + tensor var_11910_dilations_0 = const()[name = string("op_11910_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639380480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640265280))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11895_cast_fp16 = transpose(perm = var_11894, x = attn_output_135_cast_fp16)[name = string("transpose_111")]; + tensor var_11910_cast_fp16 = conv(dilations = var_11910_dilations_0, groups = var_11910_groups_0, pad = var_11910_pad_0, pad_type = var_11910_pad_type_0, strides = var_11910_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_11895_cast_fp16)[name = string("op_11910_cast_fp16")]; + tensor var_11914 = const()[name = string("op_11914"), val = tensor([0, 2, 1])]; + int32 var_11925 = const()[name = string("op_11925"), val = int32(-1)]; + fp16 const_578_promoted_to_fp16 = const()[name = string("const_578_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_221_cast_fp16 = transpose(perm = var_11914, x = var_11910_cast_fp16)[name = string("transpose_110")]; + tensor var_11927_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_578_promoted_to_fp16)[name = string("op_11927_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_11925, interleave = input_271_interleave_0, values = (hidden_states_221_cast_fp16, var_11927_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_11922_to_fp16 = const()[name = string("op_11922_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_11922_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; + tensor var_11941_to_fp16 = const()[name = string("op_11941_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640302208)))]; + tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_11941_to_fp16)[name = string("attn_output_139_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = add(x = hidden_states_213_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + int32 var_11954 = const()[name = string("op_11954"), val = int32(-1)]; + fp16 const_582_promoted_to_fp16 = const()[name = string("const_582_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11956_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = const_582_promoted_to_fp16)[name = string("op_11956_cast_fp16")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273_cast_fp16 = concat(axis = var_11954, interleave = input_273_interleave_0, values = (hidden_states_223_cast_fp16, var_11956_cast_fp16))[name = string("input_273_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_11951_to_fp16 = const()[name = string("op_11951_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_11951_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; + tensor var_11970_to_fp16 = const()[name = string("op_11970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640304576)))]; + tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_11970_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_11982 = const()[name = string("op_11982"), val = tensor([0, 2, 1])]; + tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; + tensor var_11983_cast_fp16 = transpose(perm = var_11982, x = x_221_cast_fp16)[name = string("transpose_109")]; + tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_11983_cast_fp16)[name = string("input_275_cast_fp16")]; + string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; + tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; + tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; + int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640306944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646278976))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646500224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652472256))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; + string var_12008_mode_0 = const()[name = string("op_12008_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12008_cast_fp16 = gelu(mode = var_12008_mode_0, x = x_223_cast_fp16)[name = string("op_12008_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_12008_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652693504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658665536))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; + tensor var_12016_axes_0 = const()[name = string("op_12016_axes_0"), val = tensor([2])]; + tensor var_12016_cast_fp16 = squeeze(axes = var_12016_axes_0, x = e_27_cast_fp16)[name = string("op_12016_cast_fp16")]; + tensor var_12017 = const()[name = string("op_12017"), val = tensor([0, 2, 1])]; + int32 var_12028 = const()[name = string("op_12028"), val = int32(-1)]; + fp16 const_586_promoted_to_fp16 = const()[name = string("const_586_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_225_cast_fp16 = transpose(perm = var_12017, x = var_12016_cast_fp16)[name = string("transpose_108")]; + tensor var_12030_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_586_promoted_to_fp16)[name = string("op_12030_cast_fp16")]; + bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; + tensor input_279_cast_fp16 = concat(axis = var_12028, interleave = input_279_interleave_0, values = (hidden_states_225_cast_fp16, var_12030_cast_fp16))[name = string("input_279_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_12025_to_fp16 = const()[name = string("op_12025_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_12025_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor var_12044_to_fp16 = const()[name = string("op_12044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658702464)))]; + tensor hidden_states_227_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_12044_to_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_223_cast_fp16, y = hidden_states_227_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_12098 = const()[name = string("op_12098"), val = int32(-1)]; + fp16 const_591_promoted_to_fp16 = const()[name = string("const_591_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12100_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_591_promoted_to_fp16)[name = string("op_12100_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_12098, interleave = input_281_interleave_0, values = (hidden_states_229_cast_fp16, var_12100_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_12095_to_fp16 = const()[name = string("op_12095_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_12095_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor var_12114_to_fp16 = const()[name = string("op_12114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658704832)))]; + tensor hidden_states_231_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_12114_to_fp16)[name = string("hidden_states_231_cast_fp16")]; + tensor var_12125 = const()[name = string("op_12125"), val = tensor([0, 2, 1])]; + tensor var_12128_axes_0 = const()[name = string("op_12128_axes_0"), val = tensor([2])]; + tensor var_12126_cast_fp16 = transpose(perm = var_12125, x = hidden_states_231_cast_fp16)[name = string("transpose_107")]; + tensor var_12128_cast_fp16 = expand_dims(axes = var_12128_axes_0, x = var_12126_cast_fp16)[name = string("op_12128_cast_fp16")]; + string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; + tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; + tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; + int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; + tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_12128_cast_fp16)[name = string("query_states_113")]; + string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; + tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; + tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; + int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; + tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_12128_cast_fp16)[name = string("key_states_141")]; + string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; + tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; + tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; + int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; + tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_12128_cast_fp16)[name = string("value_states_113")]; + tensor var_12170 = const()[name = string("op_12170"), val = tensor([1, 4, 256, 64])]; + tensor var_12171 = reshape(shape = var_12170, x = query_states_113)[name = string("op_12171")]; + tensor var_12176 = const()[name = string("op_12176"), val = tensor([0, 1, 3, 2])]; + tensor var_12181 = const()[name = string("op_12181"), val = tensor([1, 1, 256, 64])]; + tensor var_12182 = reshape(shape = var_12181, x = key_states_141)[name = string("op_12182")]; + tensor var_12187 = const()[name = string("op_12187"), val = tensor([0, 1, 3, 2])]; + tensor var_12192 = const()[name = string("op_12192"), val = tensor([1, 1, 256, 64])]; + tensor var_12193 = reshape(shape = var_12192, x = value_states_113)[name = string("op_12193")]; + tensor var_12198 = const()[name = string("op_12198"), val = tensor([0, 1, 3, 2])]; + int32 var_12209 = const()[name = string("op_12209"), val = int32(-1)]; + fp16 const_596_promoted = const()[name = string("const_596_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_233 = transpose(perm = var_12176, x = var_12171)[name = string("transpose_106")]; + tensor var_12211 = mul(x = hidden_states_233, y = const_596_promoted)[name = string("op_12211")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285 = concat(axis = var_12209, interleave = input_285_interleave_0, values = (hidden_states_233, var_12211))[name = string("input_285")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_12206_to_fp16 = const()[name = string("op_12206_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_12206_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor var_12225_to_fp16 = const()[name = string("op_12225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707200)))]; + tensor q_29_cast_fp16 = mul(x = normed_343, y = var_12225_to_fp16)[name = string("q_29_cast_fp16")]; + int32 var_12236 = const()[name = string("op_12236"), val = int32(-1)]; + fp16 const_600_promoted = const()[name = string("const_600_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_235 = transpose(perm = var_12187, x = var_12182)[name = string("transpose_105")]; + tensor var_12238 = mul(x = hidden_states_235, y = const_600_promoted)[name = string("op_12238")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287 = concat(axis = var_12236, interleave = input_287_interleave_0, values = (hidden_states_235, var_12238))[name = string("input_287")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_12233_to_fp16 = const()[name = string("op_12233_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_12233_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor var_12252_to_fp16 = const()[name = string("op_12252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658707776)))]; + tensor k_29_cast_fp16 = mul(x = normed_347, y = var_12252_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_12266_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5)[name = string("op_12266_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_606_promoted_to_fp16 = const()[name = string("const_606_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12287_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_606_promoted_to_fp16)[name = string("op_12287_cast_fp16")]; + int32 var_12289 = const()[name = string("op_12289"), val = int32(-1)]; + bool var_12290_interleave_0 = const()[name = string("op_12290_interleave_0"), val = bool(false)]; + tensor var_12290_cast_fp16 = concat(axis = var_12289, interleave = var_12290_interleave_0, values = (var_12287_cast_fp16, x1_57_cast_fp16))[name = string("op_12290_cast_fp16")]; + tensor var_12291_cast_fp16 = mul(x = var_12290_cast_fp16, y = sin_5)[name = string("op_12291_cast_fp16")]; + tensor query_states_115_cast_fp16 = add(x = var_12266_cast_fp16, y = var_12291_cast_fp16)[name = string("query_states_115_cast_fp16")]; + tensor var_12294_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5)[name = string("op_12294_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_609_promoted_to_fp16 = const()[name = string("const_609_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12315_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_609_promoted_to_fp16)[name = string("op_12315_cast_fp16")]; + int32 var_12317 = const()[name = string("op_12317"), val = int32(-1)]; + bool var_12318_interleave_0 = const()[name = string("op_12318_interleave_0"), val = bool(false)]; + tensor var_12318_cast_fp16 = concat(axis = var_12317, interleave = var_12318_interleave_0, values = (var_12315_cast_fp16, x1_59_cast_fp16))[name = string("op_12318_cast_fp16")]; + tensor var_12319_cast_fp16 = mul(x = var_12318_cast_fp16, y = sin_5)[name = string("op_12319_cast_fp16")]; + tensor key_states_143_cast_fp16 = add(x = var_12294_cast_fp16, y = var_12319_cast_fp16)[name = string("key_states_143_cast_fp16")]; + tensor key_slice_25_begin_0 = const()[name = string("key_slice_25_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor key_slice_25_end_0 = const()[name = string("key_slice_25_end_0"), val = tensor([13, 1, 512, 256])]; + tensor key_slice_25_end_mask_0 = const()[name = string("key_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_25_cast_fp16 = slice_by_index(begin = key_slice_25_begin_0, end = key_slice_25_end_0, end_mask = key_slice_25_end_mask_0, x = coreml_update_state_79)[name = string("key_slice_25_cast_fp16")]; + tensor var_12356_begin_0 = const()[name = string("op_12356_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_12356_end_0 = const()[name = string("op_12356_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_12356_end_mask_0 = const()[name = string("op_12356_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_12356_cast_fp16 = slice_by_index(begin = var_12356_begin_0, end = var_12356_end_0, end_mask = var_12356_end_mask_0, x = key_slice_25_cast_fp16)[name = string("op_12356_cast_fp16")]; + int32 var_12383 = const()[name = string("op_12383"), val = int32(2)]; + bool shifted_key_25_interleave_0 = const()[name = string("shifted_key_25_interleave_0"), val = bool(false)]; + tensor shifted_key_25_cast_fp16 = concat(axis = var_12383, interleave = shifted_key_25_interleave_0, values = (var_12356_cast_fp16, key_states_143_cast_fp16))[name = string("shifted_key_25_cast_fp16")]; + tensor concat_196 = const()[name = string("concat_196"), val = tensor([12, 0, 0, 0])]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([13, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_196, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_197, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = shifted_key_25_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_184_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_184")]; + tensor value_slice_25_begin_0 = const()[name = string("value_slice_25_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor value_slice_25_end_0 = const()[name = string("value_slice_25_end_0"), val = tensor([35, 1, 512, 256])]; + tensor value_slice_25_end_mask_0 = const()[name = string("value_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_25_cast_fp16 = slice_by_index(begin = value_slice_25_begin_0, end = value_slice_25_end_0, end_mask = value_slice_25_end_mask_0, x = coreml_update_state_80)[name = string("value_slice_25_cast_fp16")]; + tensor var_12426_begin_0 = const()[name = string("op_12426_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_12426_end_0 = const()[name = string("op_12426_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_12426_end_mask_0 = const()[name = string("op_12426_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_12426_cast_fp16 = slice_by_index(begin = var_12426_begin_0, end = var_12426_end_0, end_mask = var_12426_end_mask_0, x = value_slice_25_cast_fp16)[name = string("op_12426_cast_fp16")]; + int32 var_12453 = const()[name = string("op_12453"), val = int32(2)]; + bool shifted_value_25_interleave_0 = const()[name = string("shifted_value_25_interleave_0"), val = bool(false)]; + tensor value_states_115 = transpose(perm = var_12198, x = var_12193)[name = string("transpose_104")]; + tensor shifted_value_25_cast_fp16 = concat(axis = var_12453, interleave = shifted_value_25_interleave_0, values = (var_12426_cast_fp16, value_states_115))[name = string("shifted_value_25_cast_fp16")]; + tensor concat_198 = const()[name = string("concat_198"), val = tensor([34, 0, 0, 0])]; + tensor concat_199 = const()[name = string("concat_199"), val = tensor([35, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_198, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_199, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = shifted_value_25_cast_fp16, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_185_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_185")]; + tensor var_12481_begin_0 = const()[name = string("op_12481_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_12481_end_0 = const()[name = string("op_12481_end_0"), val = tensor([13, 1, 512, 256])]; + tensor var_12481_end_mask_0 = const()[name = string("op_12481_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12481_cast_fp16 = slice_by_index(begin = var_12481_begin_0, end = var_12481_end_0, end_mask = var_12481_end_mask_0, x = coreml_update_state_81)[name = string("op_12481_cast_fp16")]; + tensor var_12488_begin_0 = const()[name = string("op_12488_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_12488_end_0 = const()[name = string("op_12488_end_0"), val = tensor([35, 1, 512, 256])]; + tensor var_12488_end_mask_0 = const()[name = string("op_12488_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12488_cast_fp16 = slice_by_index(begin = var_12488_begin_0, end = var_12488_end_0, end_mask = var_12488_end_mask_0, x = coreml_update_state_81)[name = string("op_12488_cast_fp16")]; + tensor var_12527 = const()[name = string("op_12527"), val = tensor([1, 4, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_12527, x = var_12481_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_12547 = const()[name = string("op_12547"), val = tensor([1, 4, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_12547, x = var_12488_cast_fp16)[name = string("x_235_cast_fp16")]; + bool var_12574_transpose_x_0 = const()[name = string("op_12574_transpose_x_0"), val = bool(false)]; + bool var_12574_transpose_y_0 = const()[name = string("op_12574_transpose_y_0"), val = bool(true)]; + tensor var_12574 = matmul(transpose_x = var_12574_transpose_x_0, transpose_y = var_12574_transpose_y_0, x = query_states_115_cast_fp16, y = x_229_cast_fp16)[name = string("op_12574")]; + fp16 var_12575_to_fp16 = const()[name = string("op_12575_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_57_cast_fp16 = mul(x = var_12574, y = var_12575_to_fp16)[name = string("attn_weights_57_cast_fp16")]; + tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = mask_slice_1)[name = string("attn_weights_59_cast_fp16")]; + int32 var_12610 = const()[name = string("op_12610"), val = int32(-1)]; + tensor var_12612_cast_fp16 = softmax(axis = var_12610, x = attn_weights_59_cast_fp16)[name = string("op_12612_cast_fp16")]; + tensor concat_204 = const()[name = string("concat_204"), val = tensor([4, 64, 512])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_204, x = var_12612_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor concat_205 = const()[name = string("concat_205"), val = tensor([4, 512, 256])]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_205, x = x_235_cast_fp16)[name = string("reshape_43_cast_fp16")]; + bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; + bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; + tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; + tensor concat_209 = const()[name = string("concat_209"), val = tensor([1, 4, 64, 256])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_209, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor var_12624_perm_0 = const()[name = string("op_12624_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12643 = const()[name = string("op_12643"), val = tensor([1, 64, 1024])]; + tensor var_12624_cast_fp16 = transpose(perm = var_12624_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_103")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_12643, x = var_12624_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_12648 = const()[name = string("op_12648"), val = tensor([0, 2, 1])]; + string var_12664_pad_type_0 = const()[name = string("op_12664_pad_type_0"), val = string("valid")]; + int32 var_12664_groups_0 = const()[name = string("op_12664_groups_0"), val = int32(1)]; + tensor var_12664_strides_0 = const()[name = string("op_12664_strides_0"), val = tensor([1])]; + tensor var_12664_pad_0 = const()[name = string("op_12664_pad_0"), val = tensor([0, 0])]; + tensor var_12664_dilations_0 = const()[name = string("op_12664_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658708352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659593152))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12649_cast_fp16 = transpose(perm = var_12648, x = attn_output_145_cast_fp16)[name = string("transpose_102")]; + tensor var_12664_cast_fp16 = conv(dilations = var_12664_dilations_0, groups = var_12664_groups_0, pad = var_12664_pad_0, pad_type = var_12664_pad_type_0, strides = var_12664_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_12649_cast_fp16)[name = string("op_12664_cast_fp16")]; + tensor var_12668 = const()[name = string("op_12668"), val = tensor([0, 2, 1])]; + int32 var_12679 = const()[name = string("op_12679"), val = int32(-1)]; + fp16 const_620_promoted_to_fp16 = const()[name = string("const_620_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_237_cast_fp16 = transpose(perm = var_12668, x = var_12664_cast_fp16)[name = string("transpose_101")]; + tensor var_12681_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_620_promoted_to_fp16)[name = string("op_12681_cast_fp16")]; + bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; + tensor input_291_cast_fp16 = concat(axis = var_12679, interleave = input_291_interleave_0, values = (hidden_states_237_cast_fp16, var_12681_cast_fp16))[name = string("input_291_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_12676_to_fp16 = const()[name = string("op_12676_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_12676_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor var_12695_to_fp16 = const()[name = string("op_12695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659630080)))]; + tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_12695_to_fp16)[name = string("attn_output_149_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_12708 = const()[name = string("op_12708"), val = int32(-1)]; + fp16 const_624_promoted_to_fp16 = const()[name = string("const_624_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12710_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_624_promoted_to_fp16)[name = string("op_12710_cast_fp16")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293_cast_fp16 = concat(axis = var_12708, interleave = input_293_interleave_0, values = (hidden_states_239_cast_fp16, var_12710_cast_fp16))[name = string("input_293_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_12705_to_fp16 = const()[name = string("op_12705_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_12705_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor var_12724_to_fp16 = const()[name = string("op_12724_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659632448)))]; + tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_12724_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_12736 = const()[name = string("op_12736"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_12737_cast_fp16 = transpose(perm = var_12736, x = x_237_cast_fp16)[name = string("transpose_100")]; + tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_12737_cast_fp16)[name = string("input_295_cast_fp16")]; + string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; + tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; + tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; + int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659634816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665606848))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665828096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671800128))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; + string var_12762_mode_0 = const()[name = string("op_12762_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_12762_cast_fp16 = gelu(mode = var_12762_mode_0, x = x_239_cast_fp16)[name = string("op_12762_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = var_12762_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672021376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677993408))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; + tensor var_12770_axes_0 = const()[name = string("op_12770_axes_0"), val = tensor([2])]; + tensor var_12770_cast_fp16 = squeeze(axes = var_12770_axes_0, x = e_29_cast_fp16)[name = string("op_12770_cast_fp16")]; + tensor var_12771 = const()[name = string("op_12771"), val = tensor([0, 2, 1])]; + int32 var_12782 = const()[name = string("op_12782"), val = int32(-1)]; + fp16 const_628_promoted_to_fp16 = const()[name = string("const_628_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_241_cast_fp16 = transpose(perm = var_12771, x = var_12770_cast_fp16)[name = string("transpose_99")]; + tensor var_12784_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_628_promoted_to_fp16)[name = string("op_12784_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_12782, interleave = input_299_interleave_0, values = (hidden_states_241_cast_fp16, var_12784_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_12779_to_fp16 = const()[name = string("op_12779_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_12779_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; + tensor var_12798_to_fp16 = const()[name = string("op_12798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678030336)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_12798_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor hidden_states_245_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; + int32 var_12852 = const()[name = string("op_12852"), val = int32(-1)]; + fp16 const_633_promoted_to_fp16 = const()[name = string("const_633_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12854_cast_fp16 = mul(x = hidden_states_245_cast_fp16, y = const_633_promoted_to_fp16)[name = string("op_12854_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_12852, interleave = input_301_interleave_0, values = (hidden_states_245_cast_fp16, var_12854_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_12849_to_fp16 = const()[name = string("op_12849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_12849_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; + tensor var_12868_to_fp16 = const()[name = string("op_12868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678032704)))]; + tensor hidden_states_247_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_12868_to_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_12879 = const()[name = string("op_12879"), val = tensor([0, 2, 1])]; + tensor var_12882_axes_0 = const()[name = string("op_12882_axes_0"), val = tensor([2])]; + tensor var_12880_cast_fp16 = transpose(perm = var_12879, x = hidden_states_247_cast_fp16)[name = string("transpose_98")]; + tensor var_12882_cast_fp16 = expand_dims(axes = var_12882_axes_0, x = var_12880_cast_fp16)[name = string("op_12882_cast_fp16")]; + string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; + tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; + tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; + int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; + tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_12882_cast_fp16)[name = string("query_states_121")]; + string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; + tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; + tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; + int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; + tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_12882_cast_fp16)[name = string("key_states_151")]; + string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; + tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; + tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; + int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; + tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_12882_cast_fp16)[name = string("value_states_121")]; + tensor var_12924 = const()[name = string("op_12924"), val = tensor([1, 4, 256, 64])]; + tensor var_12925 = reshape(shape = var_12924, x = query_states_121)[name = string("op_12925")]; + tensor var_12930 = const()[name = string("op_12930"), val = tensor([0, 1, 3, 2])]; + tensor var_12935 = const()[name = string("op_12935"), val = tensor([1, 1, 256, 64])]; + tensor var_12936 = reshape(shape = var_12935, x = key_states_151)[name = string("op_12936")]; + tensor var_12941 = const()[name = string("op_12941"), val = tensor([0, 1, 3, 2])]; + tensor var_12946 = const()[name = string("op_12946"), val = tensor([1, 1, 256, 64])]; + tensor var_12947 = reshape(shape = var_12946, x = value_states_121)[name = string("op_12947")]; + tensor var_12952 = const()[name = string("op_12952"), val = tensor([0, 1, 3, 2])]; + int32 var_12963 = const()[name = string("op_12963"), val = int32(-1)]; + fp16 const_638_promoted = const()[name = string("const_638_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_249 = transpose(perm = var_12930, x = var_12925)[name = string("transpose_97")]; + tensor var_12965 = mul(x = hidden_states_249, y = const_638_promoted)[name = string("op_12965")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_12963, interleave = input_305_interleave_0, values = (hidden_states_249, var_12965))[name = string("input_305")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_12960_to_fp16 = const()[name = string("op_12960_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_12960_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; + tensor var_12979_to_fp16 = const()[name = string("op_12979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035072)))]; + tensor q_31_cast_fp16 = mul(x = normed_367, y = var_12979_to_fp16)[name = string("q_31_cast_fp16")]; + int32 var_12990 = const()[name = string("op_12990"), val = int32(-1)]; + fp16 const_642_promoted = const()[name = string("const_642_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_251 = transpose(perm = var_12941, x = var_12936)[name = string("transpose_96")]; + tensor var_12992 = mul(x = hidden_states_251, y = const_642_promoted)[name = string("op_12992")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_12990, interleave = input_307_interleave_0, values = (hidden_states_251, var_12992))[name = string("input_307")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_12987_to_fp16 = const()[name = string("op_12987_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_12987_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; + tensor var_13006_to_fp16 = const()[name = string("op_13006_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678035648)))]; + tensor k_31_cast_fp16 = mul(x = normed_371, y = var_13006_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_13020_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5)[name = string("op_13020_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_648_promoted_to_fp16 = const()[name = string("const_648_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13041_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_648_promoted_to_fp16)[name = string("op_13041_cast_fp16")]; + int32 var_13043 = const()[name = string("op_13043"), val = int32(-1)]; + bool var_13044_interleave_0 = const()[name = string("op_13044_interleave_0"), val = bool(false)]; + tensor var_13044_cast_fp16 = concat(axis = var_13043, interleave = var_13044_interleave_0, values = (var_13041_cast_fp16, x1_61_cast_fp16))[name = string("op_13044_cast_fp16")]; + tensor var_13045_cast_fp16 = mul(x = var_13044_cast_fp16, y = sin_5)[name = string("op_13045_cast_fp16")]; + tensor query_states_123_cast_fp16 = add(x = var_13020_cast_fp16, y = var_13045_cast_fp16)[name = string("query_states_123_cast_fp16")]; + tensor var_13048_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5)[name = string("op_13048_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_651_promoted_to_fp16 = const()[name = string("const_651_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13069_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_651_promoted_to_fp16)[name = string("op_13069_cast_fp16")]; + int32 var_13071 = const()[name = string("op_13071"), val = int32(-1)]; + bool var_13072_interleave_0 = const()[name = string("op_13072_interleave_0"), val = bool(false)]; + tensor var_13072_cast_fp16 = concat(axis = var_13071, interleave = var_13072_interleave_0, values = (var_13069_cast_fp16, x1_63_cast_fp16))[name = string("op_13072_cast_fp16")]; + tensor var_13073_cast_fp16 = mul(x = var_13072_cast_fp16, y = sin_5)[name = string("op_13073_cast_fp16")]; + tensor key_states_153_cast_fp16 = add(x = var_13048_cast_fp16, y = var_13073_cast_fp16)[name = string("key_states_153_cast_fp16")]; + tensor key_slice_27_begin_0 = const()[name = string("key_slice_27_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor key_slice_27_end_0 = const()[name = string("key_slice_27_end_0"), val = tensor([14, 1, 512, 256])]; + tensor key_slice_27_end_mask_0 = const()[name = string("key_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_27_cast_fp16 = slice_by_index(begin = key_slice_27_begin_0, end = key_slice_27_end_0, end_mask = key_slice_27_end_mask_0, x = coreml_update_state_81)[name = string("key_slice_27_cast_fp16")]; + tensor var_13110_begin_0 = const()[name = string("op_13110_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_13110_end_0 = const()[name = string("op_13110_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_13110_end_mask_0 = const()[name = string("op_13110_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_13110_cast_fp16 = slice_by_index(begin = var_13110_begin_0, end = var_13110_end_0, end_mask = var_13110_end_mask_0, x = key_slice_27_cast_fp16)[name = string("op_13110_cast_fp16")]; + int32 var_13137 = const()[name = string("op_13137"), val = int32(2)]; + bool shifted_key_27_interleave_0 = const()[name = string("shifted_key_27_interleave_0"), val = bool(false)]; + tensor shifted_key_27_cast_fp16 = concat(axis = var_13137, interleave = shifted_key_27_interleave_0, values = (var_13110_cast_fp16, key_states_153_cast_fp16))[name = string("shifted_key_27_cast_fp16")]; + tensor concat_210 = const()[name = string("concat_210"), val = tensor([13, 0, 0, 0])]; + tensor concat_211 = const()[name = string("concat_211"), val = tensor([14, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_210, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_211, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = shifted_key_27_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_186_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_186")]; + tensor value_slice_27_begin_0 = const()[name = string("value_slice_27_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor value_slice_27_end_0 = const()[name = string("value_slice_27_end_0"), val = tensor([36, 1, 512, 256])]; + tensor value_slice_27_end_mask_0 = const()[name = string("value_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_27_cast_fp16 = slice_by_index(begin = value_slice_27_begin_0, end = value_slice_27_end_0, end_mask = value_slice_27_end_mask_0, x = coreml_update_state_82)[name = string("value_slice_27_cast_fp16")]; + tensor var_13180_begin_0 = const()[name = string("op_13180_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_13180_end_0 = const()[name = string("op_13180_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_13180_end_mask_0 = const()[name = string("op_13180_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_13180_cast_fp16 = slice_by_index(begin = var_13180_begin_0, end = var_13180_end_0, end_mask = var_13180_end_mask_0, x = value_slice_27_cast_fp16)[name = string("op_13180_cast_fp16")]; + int32 var_13207 = const()[name = string("op_13207"), val = int32(2)]; + bool shifted_value_27_interleave_0 = const()[name = string("shifted_value_27_interleave_0"), val = bool(false)]; + tensor value_states_123 = transpose(perm = var_12952, x = var_12947)[name = string("transpose_95")]; + tensor shifted_value_27_cast_fp16 = concat(axis = var_13207, interleave = shifted_value_27_interleave_0, values = (var_13180_cast_fp16, value_states_123))[name = string("shifted_value_27_cast_fp16")]; + tensor concat_212 = const()[name = string("concat_212"), val = tensor([35, 0, 0, 0])]; + tensor concat_213 = const()[name = string("concat_213"), val = tensor([36, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_212, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_213, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = shifted_value_27_cast_fp16, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_187_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_187")]; + tensor var_13235_begin_0 = const()[name = string("op_13235_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_13235_end_0 = const()[name = string("op_13235_end_0"), val = tensor([14, 1, 512, 256])]; + tensor var_13235_end_mask_0 = const()[name = string("op_13235_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13235_cast_fp16 = slice_by_index(begin = var_13235_begin_0, end = var_13235_end_0, end_mask = var_13235_end_mask_0, x = coreml_update_state_83)[name = string("op_13235_cast_fp16")]; + tensor var_13242_begin_0 = const()[name = string("op_13242_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_13242_end_0 = const()[name = string("op_13242_end_0"), val = tensor([36, 1, 512, 256])]; + tensor var_13242_end_mask_0 = const()[name = string("op_13242_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13242_cast_fp16 = slice_by_index(begin = var_13242_begin_0, end = var_13242_end_0, end_mask = var_13242_end_mask_0, x = coreml_update_state_83)[name = string("op_13242_cast_fp16")]; + tensor var_13281 = const()[name = string("op_13281"), val = tensor([1, 4, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_13281, x = var_13235_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_13301 = const()[name = string("op_13301"), val = tensor([1, 4, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_13301, x = var_13242_cast_fp16)[name = string("x_251_cast_fp16")]; + bool var_13328_transpose_x_0 = const()[name = string("op_13328_transpose_x_0"), val = bool(false)]; + bool var_13328_transpose_y_0 = const()[name = string("op_13328_transpose_y_0"), val = bool(true)]; + tensor var_13328 = matmul(transpose_x = var_13328_transpose_x_0, transpose_y = var_13328_transpose_y_0, x = query_states_123_cast_fp16, y = x_245_cast_fp16)[name = string("op_13328")]; + fp16 var_13329_to_fp16 = const()[name = string("op_13329_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_13328, y = var_13329_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = mask_slice_1)[name = string("attn_weights_63_cast_fp16")]; + int32 var_13364 = const()[name = string("op_13364"), val = int32(-1)]; + tensor var_13366_cast_fp16 = softmax(axis = var_13364, x = attn_weights_63_cast_fp16)[name = string("op_13366_cast_fp16")]; + tensor concat_218 = const()[name = string("concat_218"), val = tensor([4, 64, 512])]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_218, x = var_13366_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor concat_219 = const()[name = string("concat_219"), val = tensor([4, 512, 256])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_219, x = x_251_cast_fp16)[name = string("reshape_46_cast_fp16")]; + bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; + bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; + tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; + tensor concat_223 = const()[name = string("concat_223"), val = tensor([1, 4, 64, 256])]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_223, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor var_13378_perm_0 = const()[name = string("op_13378_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13397 = const()[name = string("op_13397"), val = tensor([1, 64, 1024])]; + tensor var_13378_cast_fp16 = transpose(perm = var_13378_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_94")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_13397, x = var_13378_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_13402 = const()[name = string("op_13402"), val = tensor([0, 2, 1])]; + string var_13418_pad_type_0 = const()[name = string("op_13418_pad_type_0"), val = string("valid")]; + int32 var_13418_groups_0 = const()[name = string("op_13418_groups_0"), val = int32(1)]; + tensor var_13418_strides_0 = const()[name = string("op_13418_strides_0"), val = tensor([1])]; + tensor var_13418_pad_0 = const()[name = string("op_13418_pad_0"), val = tensor([0, 0])]; + tensor var_13418_dilations_0 = const()[name = string("op_13418_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678036224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678921024))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13403_cast_fp16 = transpose(perm = var_13402, x = attn_output_155_cast_fp16)[name = string("transpose_93")]; + tensor var_13418_cast_fp16 = conv(dilations = var_13418_dilations_0, groups = var_13418_groups_0, pad = var_13418_pad_0, pad_type = var_13418_pad_type_0, strides = var_13418_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_13403_cast_fp16)[name = string("op_13418_cast_fp16")]; + tensor var_13422 = const()[name = string("op_13422"), val = tensor([0, 2, 1])]; + int32 var_13433 = const()[name = string("op_13433"), val = int32(-1)]; + fp16 const_662_promoted_to_fp16 = const()[name = string("const_662_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_253_cast_fp16 = transpose(perm = var_13422, x = var_13418_cast_fp16)[name = string("transpose_92")]; + tensor var_13435_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_662_promoted_to_fp16)[name = string("op_13435_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_13433, interleave = input_311_interleave_0, values = (hidden_states_253_cast_fp16, var_13435_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_13430_to_fp16 = const()[name = string("op_13430_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_13430_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; + tensor var_13449_to_fp16 = const()[name = string("op_13449_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678957952)))]; + tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_13449_to_fp16)[name = string("attn_output_159_cast_fp16")]; + tensor hidden_states_255_cast_fp16 = add(x = hidden_states_245_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + int32 var_13462 = const()[name = string("op_13462"), val = int32(-1)]; + fp16 const_666_promoted_to_fp16 = const()[name = string("const_666_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13464_cast_fp16 = mul(x = hidden_states_255_cast_fp16, y = const_666_promoted_to_fp16)[name = string("op_13464_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_13462, interleave = input_313_interleave_0, values = (hidden_states_255_cast_fp16, var_13464_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_13459_to_fp16 = const()[name = string("op_13459_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_13459_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; + tensor var_13478_to_fp16 = const()[name = string("op_13478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678960320)))]; + tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_13478_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_13490 = const()[name = string("op_13490"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_13491_cast_fp16 = transpose(perm = var_13490, x = x_253_cast_fp16)[name = string("transpose_91")]; + tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_13491_cast_fp16)[name = string("input_315_cast_fp16")]; + string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; + tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; + tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; + int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678962688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684934720))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685155968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691128000))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; + string var_13516_mode_0 = const()[name = string("op_13516_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_13516_cast_fp16 = gelu(mode = var_13516_mode_0, x = x_255_cast_fp16)[name = string("op_13516_cast_fp16")]; + tensor input_317_cast_fp16 = mul(x = var_13516_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(691349248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697321280))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; + tensor var_13524_axes_0 = const()[name = string("op_13524_axes_0"), val = tensor([2])]; + tensor var_13524_cast_fp16 = squeeze(axes = var_13524_axes_0, x = e_31_cast_fp16)[name = string("op_13524_cast_fp16")]; + tensor var_13525 = const()[name = string("op_13525"), val = tensor([0, 2, 1])]; + int32 var_13536 = const()[name = string("op_13536"), val = int32(-1)]; + fp16 const_670_promoted_to_fp16 = const()[name = string("const_670_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_257_cast_fp16 = transpose(perm = var_13525, x = var_13524_cast_fp16)[name = string("transpose_90")]; + tensor var_13538_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_670_promoted_to_fp16)[name = string("op_13538_cast_fp16")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319_cast_fp16 = concat(axis = var_13536, interleave = input_319_interleave_0, values = (hidden_states_257_cast_fp16, var_13538_cast_fp16))[name = string("input_319_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_13533_to_fp16 = const()[name = string("op_13533_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_13533_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor var_13552_to_fp16 = const()[name = string("op_13552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697358208)))]; + tensor hidden_states_259_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_13552_to_fp16)[name = string("hidden_states_259_cast_fp16")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_255_cast_fp16, y = hidden_states_259_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; + int32 var_13606 = const()[name = string("op_13606"), val = int32(-1)]; + fp16 const_675_promoted_to_fp16 = const()[name = string("const_675_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13608_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_675_promoted_to_fp16)[name = string("op_13608_cast_fp16")]; + bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; + tensor input_321_cast_fp16 = concat(axis = var_13606, interleave = input_321_interleave_0, values = (hidden_states_261_cast_fp16, var_13608_cast_fp16))[name = string("input_321_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_13603_to_fp16 = const()[name = string("op_13603_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_13603_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor var_13622_to_fp16 = const()[name = string("op_13622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697360576)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_13622_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_13633 = const()[name = string("op_13633"), val = tensor([0, 2, 1])]; + tensor var_13636_axes_0 = const()[name = string("op_13636_axes_0"), val = tensor([2])]; + tensor var_13634_cast_fp16 = transpose(perm = var_13633, x = hidden_states_263_cast_fp16)[name = string("transpose_89")]; + tensor var_13636_cast_fp16 = expand_dims(axes = var_13636_axes_0, x = var_13634_cast_fp16)[name = string("op_13636_cast_fp16")]; + string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; + tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; + tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; + int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; + tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_13636_cast_fp16)[name = string("query_states_129")]; + string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; + tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; + tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; + int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; + tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_13636_cast_fp16)[name = string("key_states_161")]; + string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; + tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; + tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; + int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; + tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_13636_cast_fp16)[name = string("value_states_129")]; + tensor var_13678 = const()[name = string("op_13678"), val = tensor([1, 4, 256, 64])]; + tensor var_13679 = reshape(shape = var_13678, x = query_states_129)[name = string("op_13679")]; + tensor var_13684 = const()[name = string("op_13684"), val = tensor([0, 1, 3, 2])]; + tensor var_13689 = const()[name = string("op_13689"), val = tensor([1, 1, 256, 64])]; + tensor var_13690 = reshape(shape = var_13689, x = key_states_161)[name = string("op_13690")]; + tensor var_13695 = const()[name = string("op_13695"), val = tensor([0, 1, 3, 2])]; + tensor var_13700 = const()[name = string("op_13700"), val = tensor([1, 1, 256, 64])]; + tensor var_13701 = reshape(shape = var_13700, x = value_states_129)[name = string("op_13701")]; + tensor var_13706 = const()[name = string("op_13706"), val = tensor([0, 1, 3, 2])]; + int32 var_13717 = const()[name = string("op_13717"), val = int32(-1)]; + fp16 const_680_promoted = const()[name = string("const_680_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_265 = transpose(perm = var_13684, x = var_13679)[name = string("transpose_88")]; + tensor var_13719 = mul(x = hidden_states_265, y = const_680_promoted)[name = string("op_13719")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325 = concat(axis = var_13717, interleave = input_325_interleave_0, values = (hidden_states_265, var_13719))[name = string("input_325")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_13714_to_fp16 = const()[name = string("op_13714_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_13714_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor var_13733_to_fp16 = const()[name = string("op_13733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697362944)))]; + tensor q_33_cast_fp16 = mul(x = normed_391, y = var_13733_to_fp16)[name = string("q_33_cast_fp16")]; + int32 var_13744 = const()[name = string("op_13744"), val = int32(-1)]; + fp16 const_684_promoted = const()[name = string("const_684_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_267 = transpose(perm = var_13695, x = var_13690)[name = string("transpose_87")]; + tensor var_13746 = mul(x = hidden_states_267, y = const_684_promoted)[name = string("op_13746")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327 = concat(axis = var_13744, interleave = input_327_interleave_0, values = (hidden_states_267, var_13746))[name = string("input_327")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_13741_to_fp16 = const()[name = string("op_13741_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_13741_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor var_13760_to_fp16 = const()[name = string("op_13760_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697363520)))]; + tensor k_33_cast_fp16 = mul(x = normed_395, y = var_13760_to_fp16)[name = string("k_33_cast_fp16")]; + tensor var_13774_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5)[name = string("op_13774_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_690_promoted_to_fp16 = const()[name = string("const_690_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13795_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_690_promoted_to_fp16)[name = string("op_13795_cast_fp16")]; + int32 var_13797 = const()[name = string("op_13797"), val = int32(-1)]; + bool var_13798_interleave_0 = const()[name = string("op_13798_interleave_0"), val = bool(false)]; + tensor var_13798_cast_fp16 = concat(axis = var_13797, interleave = var_13798_interleave_0, values = (var_13795_cast_fp16, x1_65_cast_fp16))[name = string("op_13798_cast_fp16")]; + tensor var_13799_cast_fp16 = mul(x = var_13798_cast_fp16, y = sin_5)[name = string("op_13799_cast_fp16")]; + tensor query_states_131_cast_fp16 = add(x = var_13774_cast_fp16, y = var_13799_cast_fp16)[name = string("query_states_131_cast_fp16")]; + tensor var_13802_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5)[name = string("op_13802_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_693_promoted_to_fp16 = const()[name = string("const_693_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13823_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_693_promoted_to_fp16)[name = string("op_13823_cast_fp16")]; + int32 var_13825 = const()[name = string("op_13825"), val = int32(-1)]; + bool var_13826_interleave_0 = const()[name = string("op_13826_interleave_0"), val = bool(false)]; + tensor var_13826_cast_fp16 = concat(axis = var_13825, interleave = var_13826_interleave_0, values = (var_13823_cast_fp16, x1_67_cast_fp16))[name = string("op_13826_cast_fp16")]; + tensor var_13827_cast_fp16 = mul(x = var_13826_cast_fp16, y = sin_5)[name = string("op_13827_cast_fp16")]; + tensor key_states_163_cast_fp16 = add(x = var_13802_cast_fp16, y = var_13827_cast_fp16)[name = string("key_states_163_cast_fp16")]; + tensor key_slice_29_begin_0 = const()[name = string("key_slice_29_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor key_slice_29_end_0 = const()[name = string("key_slice_29_end_0"), val = tensor([15, 1, 512, 256])]; + tensor key_slice_29_end_mask_0 = const()[name = string("key_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_29_cast_fp16 = slice_by_index(begin = key_slice_29_begin_0, end = key_slice_29_end_0, end_mask = key_slice_29_end_mask_0, x = coreml_update_state_83)[name = string("key_slice_29_cast_fp16")]; + tensor var_13864_begin_0 = const()[name = string("op_13864_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_13864_end_0 = const()[name = string("op_13864_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_13864_end_mask_0 = const()[name = string("op_13864_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_13864_cast_fp16 = slice_by_index(begin = var_13864_begin_0, end = var_13864_end_0, end_mask = var_13864_end_mask_0, x = key_slice_29_cast_fp16)[name = string("op_13864_cast_fp16")]; + int32 var_13891 = const()[name = string("op_13891"), val = int32(2)]; + bool shifted_key_29_interleave_0 = const()[name = string("shifted_key_29_interleave_0"), val = bool(false)]; + tensor shifted_key_29_cast_fp16 = concat(axis = var_13891, interleave = shifted_key_29_interleave_0, values = (var_13864_cast_fp16, key_states_163_cast_fp16))[name = string("shifted_key_29_cast_fp16")]; + tensor concat_224 = const()[name = string("concat_224"), val = tensor([14, 0, 0, 0])]; + tensor concat_225 = const()[name = string("concat_225"), val = tensor([15, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_224, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_225, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = shifted_key_29_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_188_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_188")]; + tensor value_slice_29_begin_0 = const()[name = string("value_slice_29_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor value_slice_29_end_0 = const()[name = string("value_slice_29_end_0"), val = tensor([37, 1, 512, 256])]; + tensor value_slice_29_end_mask_0 = const()[name = string("value_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_29_cast_fp16 = slice_by_index(begin = value_slice_29_begin_0, end = value_slice_29_end_0, end_mask = value_slice_29_end_mask_0, x = coreml_update_state_84)[name = string("value_slice_29_cast_fp16")]; + tensor var_13934_begin_0 = const()[name = string("op_13934_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_13934_end_0 = const()[name = string("op_13934_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_13934_end_mask_0 = const()[name = string("op_13934_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_13934_cast_fp16 = slice_by_index(begin = var_13934_begin_0, end = var_13934_end_0, end_mask = var_13934_end_mask_0, x = value_slice_29_cast_fp16)[name = string("op_13934_cast_fp16")]; + int32 var_13961 = const()[name = string("op_13961"), val = int32(2)]; + bool shifted_value_29_interleave_0 = const()[name = string("shifted_value_29_interleave_0"), val = bool(false)]; + tensor value_states_131 = transpose(perm = var_13706, x = var_13701)[name = string("transpose_86")]; + tensor shifted_value_29_cast_fp16 = concat(axis = var_13961, interleave = shifted_value_29_interleave_0, values = (var_13934_cast_fp16, value_states_131))[name = string("shifted_value_29_cast_fp16")]; + tensor concat_226 = const()[name = string("concat_226"), val = tensor([36, 0, 0, 0])]; + tensor concat_227 = const()[name = string("concat_227"), val = tensor([37, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_226, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_227, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = shifted_value_29_cast_fp16, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_189_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_189")]; + tensor var_13989_begin_0 = const()[name = string("op_13989_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_13989_end_0 = const()[name = string("op_13989_end_0"), val = tensor([15, 1, 512, 256])]; + tensor var_13989_end_mask_0 = const()[name = string("op_13989_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13989_cast_fp16 = slice_by_index(begin = var_13989_begin_0, end = var_13989_end_0, end_mask = var_13989_end_mask_0, x = coreml_update_state_85)[name = string("op_13989_cast_fp16")]; + tensor var_13996_begin_0 = const()[name = string("op_13996_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_13996_end_0 = const()[name = string("op_13996_end_0"), val = tensor([37, 1, 512, 256])]; + tensor var_13996_end_mask_0 = const()[name = string("op_13996_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13996_cast_fp16 = slice_by_index(begin = var_13996_begin_0, end = var_13996_end_0, end_mask = var_13996_end_mask_0, x = coreml_update_state_85)[name = string("op_13996_cast_fp16")]; + tensor var_14035 = const()[name = string("op_14035"), val = tensor([1, 4, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_14035, x = var_13989_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_14055 = const()[name = string("op_14055"), val = tensor([1, 4, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_14055, x = var_13996_cast_fp16)[name = string("x_267_cast_fp16")]; + bool var_14082_transpose_x_0 = const()[name = string("op_14082_transpose_x_0"), val = bool(false)]; + bool var_14082_transpose_y_0 = const()[name = string("op_14082_transpose_y_0"), val = bool(true)]; + tensor var_14082 = matmul(transpose_x = var_14082_transpose_x_0, transpose_y = var_14082_transpose_y_0, x = query_states_131_cast_fp16, y = x_261_cast_fp16)[name = string("op_14082")]; + fp16 var_14083_to_fp16 = const()[name = string("op_14083_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_65_cast_fp16 = mul(x = var_14082, y = var_14083_to_fp16)[name = string("attn_weights_65_cast_fp16")]; + tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = mask_slice_1)[name = string("attn_weights_67_cast_fp16")]; + int32 var_14118 = const()[name = string("op_14118"), val = int32(-1)]; + tensor var_14120_cast_fp16 = softmax(axis = var_14118, x = attn_weights_67_cast_fp16)[name = string("op_14120_cast_fp16")]; + tensor concat_232 = const()[name = string("concat_232"), val = tensor([4, 64, 512])]; + tensor reshape_48_cast_fp16 = reshape(shape = concat_232, x = var_14120_cast_fp16)[name = string("reshape_48_cast_fp16")]; + tensor concat_233 = const()[name = string("concat_233"), val = tensor([4, 512, 256])]; + tensor reshape_49_cast_fp16 = reshape(shape = concat_233, x = x_267_cast_fp16)[name = string("reshape_49_cast_fp16")]; + bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; + bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; + tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; + tensor concat_237 = const()[name = string("concat_237"), val = tensor([1, 4, 64, 256])]; + tensor reshape_50_cast_fp16 = reshape(shape = concat_237, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; + tensor var_14132_perm_0 = const()[name = string("op_14132_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14151 = const()[name = string("op_14151"), val = tensor([1, 64, 1024])]; + tensor var_14132_cast_fp16 = transpose(perm = var_14132_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_85")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_14151, x = var_14132_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_14156 = const()[name = string("op_14156"), val = tensor([0, 2, 1])]; + string var_14172_pad_type_0 = const()[name = string("op_14172_pad_type_0"), val = string("valid")]; + int32 var_14172_groups_0 = const()[name = string("op_14172_groups_0"), val = int32(1)]; + tensor var_14172_strides_0 = const()[name = string("op_14172_strides_0"), val = tensor([1])]; + tensor var_14172_pad_0 = const()[name = string("op_14172_pad_0"), val = tensor([0, 0])]; + tensor var_14172_dilations_0 = const()[name = string("op_14172_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697364096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698248896))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14157_cast_fp16 = transpose(perm = var_14156, x = attn_output_165_cast_fp16)[name = string("transpose_84")]; + tensor var_14172_cast_fp16 = conv(dilations = var_14172_dilations_0, groups = var_14172_groups_0, pad = var_14172_pad_0, pad_type = var_14172_pad_type_0, strides = var_14172_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_14157_cast_fp16)[name = string("op_14172_cast_fp16")]; + tensor var_14176 = const()[name = string("op_14176"), val = tensor([0, 2, 1])]; + int32 var_14187 = const()[name = string("op_14187"), val = int32(-1)]; + fp16 const_704_promoted_to_fp16 = const()[name = string("const_704_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_269_cast_fp16 = transpose(perm = var_14176, x = var_14172_cast_fp16)[name = string("transpose_83")]; + tensor var_14189_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_704_promoted_to_fp16)[name = string("op_14189_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_14187, interleave = input_331_interleave_0, values = (hidden_states_269_cast_fp16, var_14189_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_14184_to_fp16 = const()[name = string("op_14184_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_14184_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor var_14203_to_fp16 = const()[name = string("op_14203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698285824)))]; + tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_14203_to_fp16)[name = string("attn_output_169_cast_fp16")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; + int32 var_14216 = const()[name = string("op_14216"), val = int32(-1)]; + fp16 const_708_promoted_to_fp16 = const()[name = string("const_708_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14218_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_708_promoted_to_fp16)[name = string("op_14218_cast_fp16")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333_cast_fp16 = concat(axis = var_14216, interleave = input_333_interleave_0, values = (hidden_states_271_cast_fp16, var_14218_cast_fp16))[name = string("input_333_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_14213_to_fp16 = const()[name = string("op_14213_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_14213_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor var_14232_to_fp16 = const()[name = string("op_14232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698288192)))]; + tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_14232_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_14244 = const()[name = string("op_14244"), val = tensor([0, 2, 1])]; + tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; + tensor var_14245_cast_fp16 = transpose(perm = var_14244, x = x_269_cast_fp16)[name = string("transpose_82")]; + tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_14245_cast_fp16)[name = string("input_335_cast_fp16")]; + string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; + tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; + tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; + int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704262592))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704483840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710455872))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; + string var_14270_mode_0 = const()[name = string("op_14270_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14270_cast_fp16 = gelu(mode = var_14270_mode_0, x = x_271_cast_fp16)[name = string("op_14270_cast_fp16")]; + tensor input_337_cast_fp16 = mul(x = var_14270_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710677120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716649152))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; + tensor var_14278_axes_0 = const()[name = string("op_14278_axes_0"), val = tensor([2])]; + tensor var_14278_cast_fp16 = squeeze(axes = var_14278_axes_0, x = e_33_cast_fp16)[name = string("op_14278_cast_fp16")]; + tensor var_14279 = const()[name = string("op_14279"), val = tensor([0, 2, 1])]; + int32 var_14290 = const()[name = string("op_14290"), val = int32(-1)]; + fp16 const_712_promoted_to_fp16 = const()[name = string("const_712_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_273_cast_fp16 = transpose(perm = var_14279, x = var_14278_cast_fp16)[name = string("transpose_81")]; + tensor var_14292_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_712_promoted_to_fp16)[name = string("op_14292_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_14290, interleave = input_339_interleave_0, values = (hidden_states_273_cast_fp16, var_14292_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_14287_to_fp16 = const()[name = string("op_14287_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_14287_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; + tensor var_14306_to_fp16 = const()[name = string("op_14306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716686080)))]; + tensor hidden_states_275_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_14306_to_fp16)[name = string("hidden_states_275_cast_fp16")]; + tensor hidden_states_277_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = hidden_states_275_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + int32 var_14360 = const()[name = string("op_14360"), val = int32(-1)]; + fp16 const_717_promoted_to_fp16 = const()[name = string("const_717_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14362_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = const_717_promoted_to_fp16)[name = string("op_14362_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_14360, interleave = input_341_interleave_0, values = (hidden_states_277_cast_fp16, var_14362_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_14357_to_fp16 = const()[name = string("op_14357_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_14357_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; + tensor var_14376_to_fp16 = const()[name = string("op_14376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716688448)))]; + tensor hidden_states_279_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_14376_to_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor var_14387 = const()[name = string("op_14387"), val = tensor([0, 2, 1])]; + tensor var_14390_axes_0 = const()[name = string("op_14390_axes_0"), val = tensor([2])]; + tensor var_14388_cast_fp16 = transpose(perm = var_14387, x = hidden_states_279_cast_fp16)[name = string("transpose_80")]; + tensor var_14390_cast_fp16 = expand_dims(axes = var_14390_axes_0, x = var_14388_cast_fp16)[name = string("op_14390_cast_fp16")]; + string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; + tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; + tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; + int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; + tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_14390_cast_fp16)[name = string("query_states_137")]; + string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; + tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; + tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; + int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; + tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_14390_cast_fp16)[name = string("key_states_171")]; + string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; + tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; + tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; + int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; + tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_14390_cast_fp16)[name = string("value_states_137")]; + tensor var_14432 = const()[name = string("op_14432"), val = tensor([1, 4, 256, 64])]; + tensor var_14433 = reshape(shape = var_14432, x = query_states_137)[name = string("op_14433")]; + tensor var_14438 = const()[name = string("op_14438"), val = tensor([0, 1, 3, 2])]; + tensor var_14443 = const()[name = string("op_14443"), val = tensor([1, 1, 256, 64])]; + tensor var_14444 = reshape(shape = var_14443, x = key_states_171)[name = string("op_14444")]; + tensor var_14449 = const()[name = string("op_14449"), val = tensor([0, 1, 3, 2])]; + tensor var_14454 = const()[name = string("op_14454"), val = tensor([1, 1, 256, 64])]; + tensor var_14455 = reshape(shape = var_14454, x = value_states_137)[name = string("op_14455")]; + tensor var_14460 = const()[name = string("op_14460"), val = tensor([0, 1, 3, 2])]; + int32 var_14471 = const()[name = string("op_14471"), val = int32(-1)]; + fp16 const_722_promoted = const()[name = string("const_722_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_281 = transpose(perm = var_14438, x = var_14433)[name = string("transpose_79")]; + tensor var_14473 = mul(x = hidden_states_281, y = const_722_promoted)[name = string("op_14473")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_14471, interleave = input_345_interleave_0, values = (hidden_states_281, var_14473))[name = string("input_345")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_14468_to_fp16 = const()[name = string("op_14468_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_14468_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; + tensor var_14487_to_fp16 = const()[name = string("op_14487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716690816)))]; + tensor q_35_cast_fp16 = mul(x = normed_415, y = var_14487_to_fp16)[name = string("q_35_cast_fp16")]; + int32 var_14498 = const()[name = string("op_14498"), val = int32(-1)]; + fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_283 = transpose(perm = var_14449, x = var_14444)[name = string("transpose_78")]; + tensor var_14500 = mul(x = hidden_states_283, y = const_726_promoted)[name = string("op_14500")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_14498, interleave = input_347_interleave_0, values = (hidden_states_283, var_14500))[name = string("input_347")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_14495_to_fp16 = const()[name = string("op_14495_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_14495_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; + tensor var_14514_to_fp16 = const()[name = string("op_14514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691392)))]; + tensor k_35_cast_fp16 = mul(x = normed_419, y = var_14514_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_14528_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_35)[name = string("op_14528_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_732_promoted_to_fp16 = const()[name = string("const_732_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14549_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_732_promoted_to_fp16)[name = string("op_14549_cast_fp16")]; + int32 var_14551 = const()[name = string("op_14551"), val = int32(-1)]; + bool var_14552_interleave_0 = const()[name = string("op_14552_interleave_0"), val = bool(false)]; + tensor var_14552_cast_fp16 = concat(axis = var_14551, interleave = var_14552_interleave_0, values = (var_14549_cast_fp16, x1_69_cast_fp16))[name = string("op_14552_cast_fp16")]; + tensor var_14553_cast_fp16 = mul(x = var_14552_cast_fp16, y = sin_35)[name = string("op_14553_cast_fp16")]; + tensor query_states_139_cast_fp16 = add(x = var_14528_cast_fp16, y = var_14553_cast_fp16)[name = string("query_states_139_cast_fp16")]; + tensor var_14556_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_35)[name = string("op_14556_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_735_promoted_to_fp16 = const()[name = string("const_735_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14577_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_735_promoted_to_fp16)[name = string("op_14577_cast_fp16")]; + int32 var_14579 = const()[name = string("op_14579"), val = int32(-1)]; + bool var_14580_interleave_0 = const()[name = string("op_14580_interleave_0"), val = bool(false)]; + tensor var_14580_cast_fp16 = concat(axis = var_14579, interleave = var_14580_interleave_0, values = (var_14577_cast_fp16, x1_71_cast_fp16))[name = string("op_14580_cast_fp16")]; + tensor var_14581_cast_fp16 = mul(x = var_14580_cast_fp16, y = sin_35)[name = string("op_14581_cast_fp16")]; + tensor key_states_173_cast_fp16 = add(x = var_14556_cast_fp16, y = var_14581_cast_fp16)[name = string("key_states_173_cast_fp16")]; + tensor concat_238 = const()[name = string("concat_238"), val = tensor([2, 0, 0, 0])]; + tensor concat_239 = const()[name = string("concat_239"), val = tensor([3, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_238, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_239, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_173_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_190_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_190")]; + tensor concat_240 = const()[name = string("concat_240"), val = tensor([6, 0, 0, 0])]; + tensor concat_241 = const()[name = string("concat_241"), val = tensor([7, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_139 = transpose(perm = var_14460, x = var_14455)[name = string("transpose_77")]; + tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = value_states_139, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_191_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_191")]; + tensor var_14680_begin_0 = const()[name = string("op_14680_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_14680_end_0 = const()[name = string("op_14680_end_0"), val = tensor([3, 1, 4096, 256])]; + tensor var_14680_end_mask_0 = const()[name = string("op_14680_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14680_cast_fp16 = slice_by_index(begin = var_14680_begin_0, end = var_14680_end_0, end_mask = var_14680_end_mask_0, x = coreml_update_state_87)[name = string("op_14680_cast_fp16")]; + tensor var_14687_begin_0 = const()[name = string("op_14687_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_14687_end_0 = const()[name = string("op_14687_end_0"), val = tensor([7, 1, 4096, 256])]; + tensor var_14687_end_mask_0 = const()[name = string("op_14687_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14687_cast_fp16 = slice_by_index(begin = var_14687_begin_0, end = var_14687_end_0, end_mask = var_14687_end_mask_0, x = coreml_update_state_87)[name = string("op_14687_cast_fp16")]; + tensor var_14726 = const()[name = string("op_14726"), val = tensor([1, 4, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_14726, x = var_14680_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_14746 = const()[name = string("op_14746"), val = tensor([1, 4, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_14746, x = var_14687_cast_fp16)[name = string("x_283_cast_fp16")]; + bool var_14773_transpose_x_0 = const()[name = string("op_14773_transpose_x_0"), val = bool(false)]; + bool var_14773_transpose_y_0 = const()[name = string("op_14773_transpose_y_0"), val = bool(true)]; + tensor var_14773 = matmul(transpose_x = var_14773_transpose_x_0, transpose_y = var_14773_transpose_y_0, x = query_states_139_cast_fp16, y = x_277_cast_fp16)[name = string("op_14773")]; + fp16 var_14774_to_fp16 = const()[name = string("op_14774_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_69_cast_fp16 = mul(x = var_14773, y = var_14774_to_fp16)[name = string("attn_weights_69_cast_fp16")]; + tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; + int32 var_14809 = const()[name = string("op_14809"), val = int32(-1)]; + tensor var_14811_cast_fp16 = softmax(axis = var_14809, x = attn_weights_71_cast_fp16)[name = string("op_14811_cast_fp16")]; + tensor concat_246 = const()[name = string("concat_246"), val = tensor([4, 64, 4096])]; + tensor reshape_51_cast_fp16 = reshape(shape = concat_246, x = var_14811_cast_fp16)[name = string("reshape_51_cast_fp16")]; + tensor concat_247 = const()[name = string("concat_247"), val = tensor([4, 4096, 256])]; + tensor reshape_52_cast_fp16 = reshape(shape = concat_247, x = x_283_cast_fp16)[name = string("reshape_52_cast_fp16")]; + bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; + bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; + tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; + tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 4, 64, 256])]; + tensor reshape_53_cast_fp16 = reshape(shape = concat_251, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; + tensor var_14823_perm_0 = const()[name = string("op_14823_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14842 = const()[name = string("op_14842"), val = tensor([1, 64, 1024])]; + tensor var_14823_cast_fp16 = transpose(perm = var_14823_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_76")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_14842, x = var_14823_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_14847 = const()[name = string("op_14847"), val = tensor([0, 2, 1])]; + string var_14863_pad_type_0 = const()[name = string("op_14863_pad_type_0"), val = string("valid")]; + int32 var_14863_groups_0 = const()[name = string("op_14863_groups_0"), val = int32(1)]; + tensor var_14863_strides_0 = const()[name = string("op_14863_strides_0"), val = tensor([1])]; + tensor var_14863_pad_0 = const()[name = string("op_14863_pad_0"), val = tensor([0, 0])]; + tensor var_14863_dilations_0 = const()[name = string("op_14863_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716691968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717576768))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14848_cast_fp16 = transpose(perm = var_14847, x = attn_output_175_cast_fp16)[name = string("transpose_75")]; + tensor var_14863_cast_fp16 = conv(dilations = var_14863_dilations_0, groups = var_14863_groups_0, pad = var_14863_pad_0, pad_type = var_14863_pad_type_0, strides = var_14863_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_14848_cast_fp16)[name = string("op_14863_cast_fp16")]; + tensor var_14867 = const()[name = string("op_14867"), val = tensor([0, 2, 1])]; + int32 var_14878 = const()[name = string("op_14878"), val = int32(-1)]; + fp16 const_747_promoted_to_fp16 = const()[name = string("const_747_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_285_cast_fp16 = transpose(perm = var_14867, x = var_14863_cast_fp16)[name = string("transpose_74")]; + tensor var_14880_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_747_promoted_to_fp16)[name = string("op_14880_cast_fp16")]; + bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; + tensor input_351_cast_fp16 = concat(axis = var_14878, interleave = input_351_interleave_0, values = (hidden_states_285_cast_fp16, var_14880_cast_fp16))[name = string("input_351_cast_fp16")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_14875_to_fp16 = const()[name = string("op_14875_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_14875_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; + tensor var_14894_to_fp16 = const()[name = string("op_14894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717613696)))]; + tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_14894_to_fp16)[name = string("attn_output_179_cast_fp16")]; + tensor hidden_states_287_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; + int32 var_14907 = const()[name = string("op_14907"), val = int32(-1)]; + fp16 const_751_promoted_to_fp16 = const()[name = string("const_751_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14909_cast_fp16 = mul(x = hidden_states_287_cast_fp16, y = const_751_promoted_to_fp16)[name = string("op_14909_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_14907, interleave = input_353_interleave_0, values = (hidden_states_287_cast_fp16, var_14909_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_14904_to_fp16 = const()[name = string("op_14904_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_14904_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; + tensor var_14923_to_fp16 = const()[name = string("op_14923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717616064)))]; + tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_14923_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_14935 = const()[name = string("op_14935"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_14936_cast_fp16 = transpose(perm = var_14935, x = x_285_cast_fp16)[name = string("transpose_73")]; + tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_14936_cast_fp16)[name = string("input_355_cast_fp16")]; + string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; + tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; + tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; + int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717618432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723590464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729783744))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; + string var_14961_mode_0 = const()[name = string("op_14961_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_14961_cast_fp16 = gelu(mode = var_14961_mode_0, x = x_287_cast_fp16)[name = string("op_14961_cast_fp16")]; + tensor input_357_cast_fp16 = mul(x = var_14961_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730004992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735977024))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; + tensor var_14969_axes_0 = const()[name = string("op_14969_axes_0"), val = tensor([2])]; + tensor var_14969_cast_fp16 = squeeze(axes = var_14969_axes_0, x = e_35_cast_fp16)[name = string("op_14969_cast_fp16")]; + tensor var_14970 = const()[name = string("op_14970"), val = tensor([0, 2, 1])]; + int32 var_14981 = const()[name = string("op_14981"), val = int32(-1)]; + fp16 const_755_promoted_to_fp16 = const()[name = string("const_755_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_289_cast_fp16 = transpose(perm = var_14970, x = var_14969_cast_fp16)[name = string("transpose_72")]; + tensor var_14983_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_755_promoted_to_fp16)[name = string("op_14983_cast_fp16")]; + bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; + tensor input_359_cast_fp16 = concat(axis = var_14981, interleave = input_359_interleave_0, values = (hidden_states_289_cast_fp16, var_14983_cast_fp16))[name = string("input_359_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_14978_to_fp16 = const()[name = string("op_14978_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_14978_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor var_14997_to_fp16 = const()[name = string("op_14997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736013952)))]; + tensor hidden_states_291_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_14997_to_fp16)[name = string("hidden_states_291_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = add(x = hidden_states_287_cast_fp16, y = hidden_states_291_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + int32 var_15051 = const()[name = string("op_15051"), val = int32(-1)]; + fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15053_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_15053_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_15051, interleave = input_361_interleave_0, values = (hidden_states_293_cast_fp16, var_15053_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_15048_to_fp16 = const()[name = string("op_15048_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_15048_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor var_15067_to_fp16 = const()[name = string("op_15067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736016320)))]; + tensor hidden_states_295_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_15067_to_fp16)[name = string("hidden_states_295_cast_fp16")]; + tensor var_15078 = const()[name = string("op_15078"), val = tensor([0, 2, 1])]; + tensor var_15081_axes_0 = const()[name = string("op_15081_axes_0"), val = tensor([2])]; + tensor var_15079_cast_fp16 = transpose(perm = var_15078, x = hidden_states_295_cast_fp16)[name = string("transpose_71")]; + tensor var_15081_cast_fp16 = expand_dims(axes = var_15081_axes_0, x = var_15079_cast_fp16)[name = string("op_15081_cast_fp16")]; + string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; + tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; + tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; + int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; + tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_15081_cast_fp16)[name = string("query_states_145")]; + string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; + tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; + tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; + int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; + tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_15081_cast_fp16)[name = string("key_states_181")]; + string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; + tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; + tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; + int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; + tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_15081_cast_fp16)[name = string("value_states_145")]; + tensor var_15123 = const()[name = string("op_15123"), val = tensor([1, 4, 256, 64])]; + tensor var_15124 = reshape(shape = var_15123, x = query_states_145)[name = string("op_15124")]; + tensor var_15129 = const()[name = string("op_15129"), val = tensor([0, 1, 3, 2])]; + tensor var_15134 = const()[name = string("op_15134"), val = tensor([1, 1, 256, 64])]; + tensor var_15135 = reshape(shape = var_15134, x = key_states_181)[name = string("op_15135")]; + tensor var_15140 = const()[name = string("op_15140"), val = tensor([0, 1, 3, 2])]; + tensor var_15145 = const()[name = string("op_15145"), val = tensor([1, 1, 256, 64])]; + tensor var_15146 = reshape(shape = var_15145, x = value_states_145)[name = string("op_15146")]; + tensor var_15151 = const()[name = string("op_15151"), val = tensor([0, 1, 3, 2])]; + int32 var_15162 = const()[name = string("op_15162"), val = int32(-1)]; + fp16 const_765_promoted = const()[name = string("const_765_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_297 = transpose(perm = var_15129, x = var_15124)[name = string("transpose_70")]; + tensor var_15164 = mul(x = hidden_states_297, y = const_765_promoted)[name = string("op_15164")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_15162, interleave = input_365_interleave_0, values = (hidden_states_297, var_15164))[name = string("input_365")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_15159_to_fp16 = const()[name = string("op_15159_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_15159_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor var_15178_to_fp16 = const()[name = string("op_15178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018688)))]; + tensor q_37_cast_fp16 = mul(x = normed_439, y = var_15178_to_fp16)[name = string("q_37_cast_fp16")]; + int32 var_15189 = const()[name = string("op_15189"), val = int32(-1)]; + fp16 const_769_promoted = const()[name = string("const_769_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_299 = transpose(perm = var_15140, x = var_15135)[name = string("transpose_69")]; + tensor var_15191 = mul(x = hidden_states_299, y = const_769_promoted)[name = string("op_15191")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_15189, interleave = input_367_interleave_0, values = (hidden_states_299, var_15191))[name = string("input_367")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_15186_to_fp16 = const()[name = string("op_15186_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_15186_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor var_15205_to_fp16 = const()[name = string("op_15205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019264)))]; + tensor k_37_cast_fp16 = mul(x = normed_443, y = var_15205_to_fp16)[name = string("k_37_cast_fp16")]; + tensor var_15219_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5)[name = string("op_15219_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_775_promoted_to_fp16 = const()[name = string("const_775_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15240_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_775_promoted_to_fp16)[name = string("op_15240_cast_fp16")]; + int32 var_15242 = const()[name = string("op_15242"), val = int32(-1)]; + bool var_15243_interleave_0 = const()[name = string("op_15243_interleave_0"), val = bool(false)]; + tensor var_15243_cast_fp16 = concat(axis = var_15242, interleave = var_15243_interleave_0, values = (var_15240_cast_fp16, x1_73_cast_fp16))[name = string("op_15243_cast_fp16")]; + tensor var_15244_cast_fp16 = mul(x = var_15243_cast_fp16, y = sin_5)[name = string("op_15244_cast_fp16")]; + tensor query_states_147_cast_fp16 = add(x = var_15219_cast_fp16, y = var_15244_cast_fp16)[name = string("query_states_147_cast_fp16")]; + tensor var_15247_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5)[name = string("op_15247_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_778_promoted_to_fp16 = const()[name = string("const_778_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15268_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_778_promoted_to_fp16)[name = string("op_15268_cast_fp16")]; + int32 var_15270 = const()[name = string("op_15270"), val = int32(-1)]; + bool var_15271_interleave_0 = const()[name = string("op_15271_interleave_0"), val = bool(false)]; + tensor var_15271_cast_fp16 = concat(axis = var_15270, interleave = var_15271_interleave_0, values = (var_15268_cast_fp16, x1_75_cast_fp16))[name = string("op_15271_cast_fp16")]; + tensor var_15272_cast_fp16 = mul(x = var_15271_cast_fp16, y = sin_5)[name = string("op_15272_cast_fp16")]; + tensor key_states_183_cast_fp16 = add(x = var_15247_cast_fp16, y = var_15272_cast_fp16)[name = string("key_states_183_cast_fp16")]; + tensor key_slice_31_begin_0 = const()[name = string("key_slice_31_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor key_slice_31_end_0 = const()[name = string("key_slice_31_end_0"), val = tensor([16, 1, 512, 256])]; + tensor key_slice_31_end_mask_0 = const()[name = string("key_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_31_cast_fp16 = slice_by_index(begin = key_slice_31_begin_0, end = key_slice_31_end_0, end_mask = key_slice_31_end_mask_0, x = coreml_update_state_85)[name = string("key_slice_31_cast_fp16")]; + tensor var_15309_begin_0 = const()[name = string("op_15309_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_15309_end_0 = const()[name = string("op_15309_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_15309_end_mask_0 = const()[name = string("op_15309_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_15309_cast_fp16 = slice_by_index(begin = var_15309_begin_0, end = var_15309_end_0, end_mask = var_15309_end_mask_0, x = key_slice_31_cast_fp16)[name = string("op_15309_cast_fp16")]; + int32 var_15336 = const()[name = string("op_15336"), val = int32(2)]; + bool shifted_key_31_interleave_0 = const()[name = string("shifted_key_31_interleave_0"), val = bool(false)]; + tensor shifted_key_31_cast_fp16 = concat(axis = var_15336, interleave = shifted_key_31_interleave_0, values = (var_15309_cast_fp16, key_states_183_cast_fp16))[name = string("shifted_key_31_cast_fp16")]; + tensor concat_252 = const()[name = string("concat_252"), val = tensor([15, 0, 0, 0])]; + tensor concat_253 = const()[name = string("concat_253"), val = tensor([16, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_252, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_253, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = shifted_key_31_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_192_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_192")]; + tensor value_slice_31_begin_0 = const()[name = string("value_slice_31_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor value_slice_31_end_0 = const()[name = string("value_slice_31_end_0"), val = tensor([38, 1, 512, 256])]; + tensor value_slice_31_end_mask_0 = const()[name = string("value_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_31_cast_fp16 = slice_by_index(begin = value_slice_31_begin_0, end = value_slice_31_end_0, end_mask = value_slice_31_end_mask_0, x = coreml_update_state_88)[name = string("value_slice_31_cast_fp16")]; + tensor var_15379_begin_0 = const()[name = string("op_15379_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_15379_end_0 = const()[name = string("op_15379_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_15379_end_mask_0 = const()[name = string("op_15379_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_15379_cast_fp16 = slice_by_index(begin = var_15379_begin_0, end = var_15379_end_0, end_mask = var_15379_end_mask_0, x = value_slice_31_cast_fp16)[name = string("op_15379_cast_fp16")]; + int32 var_15406 = const()[name = string("op_15406"), val = int32(2)]; + bool shifted_value_31_interleave_0 = const()[name = string("shifted_value_31_interleave_0"), val = bool(false)]; + tensor value_states_147 = transpose(perm = var_15151, x = var_15146)[name = string("transpose_68")]; + tensor shifted_value_31_cast_fp16 = concat(axis = var_15406, interleave = shifted_value_31_interleave_0, values = (var_15379_cast_fp16, value_states_147))[name = string("shifted_value_31_cast_fp16")]; + tensor concat_254 = const()[name = string("concat_254"), val = tensor([37, 0, 0, 0])]; + tensor concat_255 = const()[name = string("concat_255"), val = tensor([38, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = shifted_value_31_cast_fp16, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_193_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_193")]; + tensor var_15434_begin_0 = const()[name = string("op_15434_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_15434_end_0 = const()[name = string("op_15434_end_0"), val = tensor([16, 1, 512, 256])]; + tensor var_15434_end_mask_0 = const()[name = string("op_15434_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15434_cast_fp16 = slice_by_index(begin = var_15434_begin_0, end = var_15434_end_0, end_mask = var_15434_end_mask_0, x = coreml_update_state_89)[name = string("op_15434_cast_fp16")]; + tensor var_15441_begin_0 = const()[name = string("op_15441_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_15441_end_0 = const()[name = string("op_15441_end_0"), val = tensor([38, 1, 512, 256])]; + tensor var_15441_end_mask_0 = const()[name = string("op_15441_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15441_cast_fp16 = slice_by_index(begin = var_15441_begin_0, end = var_15441_end_0, end_mask = var_15441_end_mask_0, x = coreml_update_state_89)[name = string("op_15441_cast_fp16")]; + tensor var_15480 = const()[name = string("op_15480"), val = tensor([1, 4, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_15480, x = var_15434_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_15500 = const()[name = string("op_15500"), val = tensor([1, 4, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_15500, x = var_15441_cast_fp16)[name = string("x_299_cast_fp16")]; + bool var_15527_transpose_x_0 = const()[name = string("op_15527_transpose_x_0"), val = bool(false)]; + bool var_15527_transpose_y_0 = const()[name = string("op_15527_transpose_y_0"), val = bool(true)]; + tensor var_15527 = matmul(transpose_x = var_15527_transpose_x_0, transpose_y = var_15527_transpose_y_0, x = query_states_147_cast_fp16, y = x_293_cast_fp16)[name = string("op_15527")]; + fp16 var_15528_to_fp16 = const()[name = string("op_15528_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_15527, y = var_15528_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = mask_slice_1)[name = string("attn_weights_75_cast_fp16")]; + int32 var_15563 = const()[name = string("op_15563"), val = int32(-1)]; + tensor var_15565_cast_fp16 = softmax(axis = var_15563, x = attn_weights_75_cast_fp16)[name = string("op_15565_cast_fp16")]; + tensor concat_260 = const()[name = string("concat_260"), val = tensor([4, 64, 512])]; + tensor reshape_54_cast_fp16 = reshape(shape = concat_260, x = var_15565_cast_fp16)[name = string("reshape_54_cast_fp16")]; + tensor concat_261 = const()[name = string("concat_261"), val = tensor([4, 512, 256])]; + tensor reshape_55_cast_fp16 = reshape(shape = concat_261, x = x_299_cast_fp16)[name = string("reshape_55_cast_fp16")]; + bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; + bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; + tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; + tensor concat_265 = const()[name = string("concat_265"), val = tensor([1, 4, 64, 256])]; + tensor reshape_56_cast_fp16 = reshape(shape = concat_265, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; + tensor var_15577_perm_0 = const()[name = string("op_15577_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15596 = const()[name = string("op_15596"), val = tensor([1, 64, 1024])]; + tensor var_15577_cast_fp16 = transpose(perm = var_15577_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_15596, x = var_15577_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_15601 = const()[name = string("op_15601"), val = tensor([0, 2, 1])]; + string var_15617_pad_type_0 = const()[name = string("op_15617_pad_type_0"), val = string("valid")]; + int32 var_15617_groups_0 = const()[name = string("op_15617_groups_0"), val = int32(1)]; + tensor var_15617_strides_0 = const()[name = string("op_15617_strides_0"), val = tensor([1])]; + tensor var_15617_pad_0 = const()[name = string("op_15617_pad_0"), val = tensor([0, 0])]; + tensor var_15617_dilations_0 = const()[name = string("op_15617_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736019840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736904640))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15602_cast_fp16 = transpose(perm = var_15601, x = attn_output_185_cast_fp16)[name = string("transpose_66")]; + tensor var_15617_cast_fp16 = conv(dilations = var_15617_dilations_0, groups = var_15617_groups_0, pad = var_15617_pad_0, pad_type = var_15617_pad_type_0, strides = var_15617_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_15602_cast_fp16)[name = string("op_15617_cast_fp16")]; + tensor var_15621 = const()[name = string("op_15621"), val = tensor([0, 2, 1])]; + int32 var_15632 = const()[name = string("op_15632"), val = int32(-1)]; + fp16 const_789_promoted_to_fp16 = const()[name = string("const_789_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_301_cast_fp16 = transpose(perm = var_15621, x = var_15617_cast_fp16)[name = string("transpose_65")]; + tensor var_15634_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_789_promoted_to_fp16)[name = string("op_15634_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_15632, interleave = input_371_interleave_0, values = (hidden_states_301_cast_fp16, var_15634_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_15629_to_fp16 = const()[name = string("op_15629_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_15629_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; + tensor var_15648_to_fp16 = const()[name = string("op_15648_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736941568)))]; + tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_15648_to_fp16)[name = string("attn_output_189_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = add(x = hidden_states_293_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + int32 var_15661 = const()[name = string("op_15661"), val = int32(-1)]; + fp16 const_793_promoted_to_fp16 = const()[name = string("const_793_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15663_cast_fp16 = mul(x = hidden_states_303_cast_fp16, y = const_793_promoted_to_fp16)[name = string("op_15663_cast_fp16")]; + bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; + tensor input_373_cast_fp16 = concat(axis = var_15661, interleave = input_373_interleave_0, values = (hidden_states_303_cast_fp16, var_15663_cast_fp16))[name = string("input_373_cast_fp16")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_15658_to_fp16 = const()[name = string("op_15658_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_15658_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; + tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; + tensor var_15677_to_fp16 = const()[name = string("op_15677_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736943936)))]; + tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_15677_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_15689 = const()[name = string("op_15689"), val = tensor([0, 2, 1])]; + tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; + tensor var_15690_cast_fp16 = transpose(perm = var_15689, x = x_301_cast_fp16)[name = string("transpose_64")]; + tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_15690_cast_fp16)[name = string("input_375_cast_fp16")]; + string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; + tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; + tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; + int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736946304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742918336))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743139584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749111616))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; + string var_15715_mode_0 = const()[name = string("op_15715_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_15715_cast_fp16 = gelu(mode = var_15715_mode_0, x = x_303_cast_fp16)[name = string("op_15715_cast_fp16")]; + tensor input_377_cast_fp16 = mul(x = var_15715_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749332864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755304896))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; + tensor var_15723_axes_0 = const()[name = string("op_15723_axes_0"), val = tensor([2])]; + tensor var_15723_cast_fp16 = squeeze(axes = var_15723_axes_0, x = e_37_cast_fp16)[name = string("op_15723_cast_fp16")]; + tensor var_15724 = const()[name = string("op_15724"), val = tensor([0, 2, 1])]; + int32 var_15735 = const()[name = string("op_15735"), val = int32(-1)]; + fp16 const_797_promoted_to_fp16 = const()[name = string("const_797_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_305_cast_fp16 = transpose(perm = var_15724, x = var_15723_cast_fp16)[name = string("transpose_63")]; + tensor var_15737_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_797_promoted_to_fp16)[name = string("op_15737_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_15735, interleave = input_379_interleave_0, values = (hidden_states_305_cast_fp16, var_15737_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; + fp16 var_15732_to_fp16 = const()[name = string("op_15732_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_15732_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; + tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; + tensor var_15751_to_fp16 = const()[name = string("op_15751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755341824)))]; + tensor hidden_states_307_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_15751_to_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor hidden_states_309_cast_fp16 = add(x = hidden_states_303_cast_fp16, y = hidden_states_307_cast_fp16)[name = string("hidden_states_309_cast_fp16")]; + int32 var_15805 = const()[name = string("op_15805"), val = int32(-1)]; + fp16 const_802_promoted_to_fp16 = const()[name = string("const_802_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15807_cast_fp16 = mul(x = hidden_states_309_cast_fp16, y = const_802_promoted_to_fp16)[name = string("op_15807_cast_fp16")]; + bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; + tensor input_381_cast_fp16 = concat(axis = var_15805, interleave = input_381_interleave_0, values = (hidden_states_309_cast_fp16, var_15807_cast_fp16))[name = string("input_381_cast_fp16")]; + tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; + fp16 var_15802_to_fp16 = const()[name = string("op_15802_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_15802_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; + tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; + tensor var_15821_to_fp16 = const()[name = string("op_15821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755344192)))]; + tensor hidden_states_311_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_15821_to_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor var_15832 = const()[name = string("op_15832"), val = tensor([0, 2, 1])]; + tensor var_15835_axes_0 = const()[name = string("op_15835_axes_0"), val = tensor([2])]; + tensor var_15833_cast_fp16 = transpose(perm = var_15832, x = hidden_states_311_cast_fp16)[name = string("transpose_62")]; + tensor var_15835_cast_fp16 = expand_dims(axes = var_15835_axes_0, x = var_15833_cast_fp16)[name = string("op_15835_cast_fp16")]; + string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; + tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; + tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; + int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; + tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_15835_cast_fp16)[name = string("query_states_153")]; + string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; + tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; + tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; + int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; + tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_15835_cast_fp16)[name = string("key_states_191")]; + string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; + tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; + tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; + int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; + tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_15835_cast_fp16)[name = string("value_states_153")]; + tensor var_15877 = const()[name = string("op_15877"), val = tensor([1, 4, 256, 64])]; + tensor var_15878 = reshape(shape = var_15877, x = query_states_153)[name = string("op_15878")]; + tensor var_15883 = const()[name = string("op_15883"), val = tensor([0, 1, 3, 2])]; + tensor var_15888 = const()[name = string("op_15888"), val = tensor([1, 1, 256, 64])]; + tensor var_15889 = reshape(shape = var_15888, x = key_states_191)[name = string("op_15889")]; + tensor var_15894 = const()[name = string("op_15894"), val = tensor([0, 1, 3, 2])]; + tensor var_15899 = const()[name = string("op_15899"), val = tensor([1, 1, 256, 64])]; + tensor var_15900 = reshape(shape = var_15899, x = value_states_153)[name = string("op_15900")]; + tensor var_15905 = const()[name = string("op_15905"), val = tensor([0, 1, 3, 2])]; + int32 var_15916 = const()[name = string("op_15916"), val = int32(-1)]; + fp16 const_807_promoted = const()[name = string("const_807_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_313 = transpose(perm = var_15883, x = var_15878)[name = string("transpose_61")]; + tensor var_15918 = mul(x = hidden_states_313, y = const_807_promoted)[name = string("op_15918")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_15916, interleave = input_385_interleave_0, values = (hidden_states_313, var_15918))[name = string("input_385")]; + tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; + fp16 var_15913_to_fp16 = const()[name = string("op_15913_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_15913_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; + tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; + tensor var_15932_to_fp16 = const()[name = string("op_15932_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755346560)))]; + tensor q_39_cast_fp16 = mul(x = normed_463, y = var_15932_to_fp16)[name = string("q_39_cast_fp16")]; + int32 var_15943 = const()[name = string("op_15943"), val = int32(-1)]; + fp16 const_811_promoted = const()[name = string("const_811_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_315 = transpose(perm = var_15894, x = var_15889)[name = string("transpose_60")]; + tensor var_15945 = mul(x = hidden_states_315, y = const_811_promoted)[name = string("op_15945")]; + bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; + tensor input_387 = concat(axis = var_15943, interleave = input_387_interleave_0, values = (hidden_states_315, var_15945))[name = string("input_387")]; + tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; + fp16 var_15940_to_fp16 = const()[name = string("op_15940_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_15940_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; + tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; + tensor var_15959_to_fp16 = const()[name = string("op_15959_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347136)))]; + tensor k_39_cast_fp16 = mul(x = normed_467, y = var_15959_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_15973_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5)[name = string("op_15973_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_817_promoted_to_fp16 = const()[name = string("const_817_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15994_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_817_promoted_to_fp16)[name = string("op_15994_cast_fp16")]; + int32 var_15996 = const()[name = string("op_15996"), val = int32(-1)]; + bool var_15997_interleave_0 = const()[name = string("op_15997_interleave_0"), val = bool(false)]; + tensor var_15997_cast_fp16 = concat(axis = var_15996, interleave = var_15997_interleave_0, values = (var_15994_cast_fp16, x1_77_cast_fp16))[name = string("op_15997_cast_fp16")]; + tensor var_15998_cast_fp16 = mul(x = var_15997_cast_fp16, y = sin_5)[name = string("op_15998_cast_fp16")]; + tensor query_states_155_cast_fp16 = add(x = var_15973_cast_fp16, y = var_15998_cast_fp16)[name = string("query_states_155_cast_fp16")]; + tensor var_16001_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5)[name = string("op_16001_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_820_promoted_to_fp16 = const()[name = string("const_820_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16022_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_820_promoted_to_fp16)[name = string("op_16022_cast_fp16")]; + int32 var_16024 = const()[name = string("op_16024"), val = int32(-1)]; + bool var_16025_interleave_0 = const()[name = string("op_16025_interleave_0"), val = bool(false)]; + tensor var_16025_cast_fp16 = concat(axis = var_16024, interleave = var_16025_interleave_0, values = (var_16022_cast_fp16, x1_79_cast_fp16))[name = string("op_16025_cast_fp16")]; + tensor var_16026_cast_fp16 = mul(x = var_16025_cast_fp16, y = sin_5)[name = string("op_16026_cast_fp16")]; + tensor key_states_193_cast_fp16 = add(x = var_16001_cast_fp16, y = var_16026_cast_fp16)[name = string("key_states_193_cast_fp16")]; + tensor key_slice_33_begin_0 = const()[name = string("key_slice_33_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor key_slice_33_end_0 = const()[name = string("key_slice_33_end_0"), val = tensor([17, 1, 512, 256])]; + tensor key_slice_33_end_mask_0 = const()[name = string("key_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_33_cast_fp16 = slice_by_index(begin = key_slice_33_begin_0, end = key_slice_33_end_0, end_mask = key_slice_33_end_mask_0, x = coreml_update_state_89)[name = string("key_slice_33_cast_fp16")]; + tensor var_16063_begin_0 = const()[name = string("op_16063_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_16063_end_0 = const()[name = string("op_16063_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_16063_end_mask_0 = const()[name = string("op_16063_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16063_cast_fp16 = slice_by_index(begin = var_16063_begin_0, end = var_16063_end_0, end_mask = var_16063_end_mask_0, x = key_slice_33_cast_fp16)[name = string("op_16063_cast_fp16")]; + int32 var_16090 = const()[name = string("op_16090"), val = int32(2)]; + bool shifted_key_33_interleave_0 = const()[name = string("shifted_key_33_interleave_0"), val = bool(false)]; + tensor shifted_key_33_cast_fp16 = concat(axis = var_16090, interleave = shifted_key_33_interleave_0, values = (var_16063_cast_fp16, key_states_193_cast_fp16))[name = string("shifted_key_33_cast_fp16")]; + tensor concat_266 = const()[name = string("concat_266"), val = tensor([16, 0, 0, 0])]; + tensor concat_267 = const()[name = string("concat_267"), val = tensor([17, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_266, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_267, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = shifted_key_33_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_194_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_194")]; + tensor value_slice_33_begin_0 = const()[name = string("value_slice_33_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor value_slice_33_end_0 = const()[name = string("value_slice_33_end_0"), val = tensor([39, 1, 512, 256])]; + tensor value_slice_33_end_mask_0 = const()[name = string("value_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_33_cast_fp16 = slice_by_index(begin = value_slice_33_begin_0, end = value_slice_33_end_0, end_mask = value_slice_33_end_mask_0, x = coreml_update_state_90)[name = string("value_slice_33_cast_fp16")]; + tensor var_16133_begin_0 = const()[name = string("op_16133_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_16133_end_0 = const()[name = string("op_16133_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_16133_end_mask_0 = const()[name = string("op_16133_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16133_cast_fp16 = slice_by_index(begin = var_16133_begin_0, end = var_16133_end_0, end_mask = var_16133_end_mask_0, x = value_slice_33_cast_fp16)[name = string("op_16133_cast_fp16")]; + int32 var_16160 = const()[name = string("op_16160"), val = int32(2)]; + bool shifted_value_33_interleave_0 = const()[name = string("shifted_value_33_interleave_0"), val = bool(false)]; + tensor value_states_155 = transpose(perm = var_15905, x = var_15900)[name = string("transpose_59")]; + tensor shifted_value_33_cast_fp16 = concat(axis = var_16160, interleave = shifted_value_33_interleave_0, values = (var_16133_cast_fp16, value_states_155))[name = string("shifted_value_33_cast_fp16")]; + tensor concat_268 = const()[name = string("concat_268"), val = tensor([38, 0, 0, 0])]; + tensor concat_269 = const()[name = string("concat_269"), val = tensor([39, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_268, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_269, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = shifted_value_33_cast_fp16, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_195_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_195")]; + tensor var_16188_begin_0 = const()[name = string("op_16188_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_16188_end_0 = const()[name = string("op_16188_end_0"), val = tensor([17, 1, 512, 256])]; + tensor var_16188_end_mask_0 = const()[name = string("op_16188_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16188_cast_fp16 = slice_by_index(begin = var_16188_begin_0, end = var_16188_end_0, end_mask = var_16188_end_mask_0, x = coreml_update_state_91)[name = string("op_16188_cast_fp16")]; + tensor var_16195_begin_0 = const()[name = string("op_16195_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_16195_end_0 = const()[name = string("op_16195_end_0"), val = tensor([39, 1, 512, 256])]; + tensor var_16195_end_mask_0 = const()[name = string("op_16195_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16195_cast_fp16 = slice_by_index(begin = var_16195_begin_0, end = var_16195_end_0, end_mask = var_16195_end_mask_0, x = coreml_update_state_91)[name = string("op_16195_cast_fp16")]; + tensor var_16234 = const()[name = string("op_16234"), val = tensor([1, 4, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_16234, x = var_16188_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_16254 = const()[name = string("op_16254"), val = tensor([1, 4, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_16254, x = var_16195_cast_fp16)[name = string("x_315_cast_fp16")]; + bool var_16281_transpose_x_0 = const()[name = string("op_16281_transpose_x_0"), val = bool(false)]; + bool var_16281_transpose_y_0 = const()[name = string("op_16281_transpose_y_0"), val = bool(true)]; + tensor var_16281 = matmul(transpose_x = var_16281_transpose_x_0, transpose_y = var_16281_transpose_y_0, x = query_states_155_cast_fp16, y = x_309_cast_fp16)[name = string("op_16281")]; + fp16 var_16282_to_fp16 = const()[name = string("op_16282_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_77_cast_fp16 = mul(x = var_16281, y = var_16282_to_fp16)[name = string("attn_weights_77_cast_fp16")]; + tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = mask_slice_1)[name = string("attn_weights_79_cast_fp16")]; + int32 var_16317 = const()[name = string("op_16317"), val = int32(-1)]; + tensor var_16319_cast_fp16 = softmax(axis = var_16317, x = attn_weights_79_cast_fp16)[name = string("op_16319_cast_fp16")]; + tensor concat_274 = const()[name = string("concat_274"), val = tensor([4, 64, 512])]; + tensor reshape_57_cast_fp16 = reshape(shape = concat_274, x = var_16319_cast_fp16)[name = string("reshape_57_cast_fp16")]; + tensor concat_275 = const()[name = string("concat_275"), val = tensor([4, 512, 256])]; + tensor reshape_58_cast_fp16 = reshape(shape = concat_275, x = x_315_cast_fp16)[name = string("reshape_58_cast_fp16")]; + bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; + bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; + tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; + tensor concat_279 = const()[name = string("concat_279"), val = tensor([1, 4, 64, 256])]; + tensor reshape_59_cast_fp16 = reshape(shape = concat_279, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; + tensor var_16331_perm_0 = const()[name = string("op_16331_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16350 = const()[name = string("op_16350"), val = tensor([1, 64, 1024])]; + tensor var_16331_cast_fp16 = transpose(perm = var_16331_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_58")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_16350, x = var_16331_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_16355 = const()[name = string("op_16355"), val = tensor([0, 2, 1])]; + string var_16371_pad_type_0 = const()[name = string("op_16371_pad_type_0"), val = string("valid")]; + int32 var_16371_groups_0 = const()[name = string("op_16371_groups_0"), val = int32(1)]; + tensor var_16371_strides_0 = const()[name = string("op_16371_strides_0"), val = tensor([1])]; + tensor var_16371_pad_0 = const()[name = string("op_16371_pad_0"), val = tensor([0, 0])]; + tensor var_16371_dilations_0 = const()[name = string("op_16371_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755347712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756232512))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16356_cast_fp16 = transpose(perm = var_16355, x = attn_output_195_cast_fp16)[name = string("transpose_57")]; + tensor var_16371_cast_fp16 = conv(dilations = var_16371_dilations_0, groups = var_16371_groups_0, pad = var_16371_pad_0, pad_type = var_16371_pad_type_0, strides = var_16371_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_16356_cast_fp16)[name = string("op_16371_cast_fp16")]; + tensor var_16375 = const()[name = string("op_16375"), val = tensor([0, 2, 1])]; + int32 var_16386 = const()[name = string("op_16386"), val = int32(-1)]; + fp16 const_831_promoted_to_fp16 = const()[name = string("const_831_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_317_cast_fp16 = transpose(perm = var_16375, x = var_16371_cast_fp16)[name = string("transpose_56")]; + tensor var_16388_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_831_promoted_to_fp16)[name = string("op_16388_cast_fp16")]; + bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; + tensor input_391_cast_fp16 = concat(axis = var_16386, interleave = input_391_interleave_0, values = (hidden_states_317_cast_fp16, var_16388_cast_fp16))[name = string("input_391_cast_fp16")]; + tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; + fp16 var_16383_to_fp16 = const()[name = string("op_16383_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_16383_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; + tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; + tensor var_16402_to_fp16 = const()[name = string("op_16402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756269440)))]; + tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_16402_to_fp16)[name = string("attn_output_199_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_309_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + int32 var_16415 = const()[name = string("op_16415"), val = int32(-1)]; + fp16 const_835_promoted_to_fp16 = const()[name = string("const_835_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16417_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = const_835_promoted_to_fp16)[name = string("op_16417_cast_fp16")]; + bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; + tensor input_393_cast_fp16 = concat(axis = var_16415, interleave = input_393_interleave_0, values = (hidden_states_319_cast_fp16, var_16417_cast_fp16))[name = string("input_393_cast_fp16")]; + tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; + fp16 var_16412_to_fp16 = const()[name = string("op_16412_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_16412_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; + tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; + tensor var_16431_to_fp16 = const()[name = string("op_16431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756271808)))]; + tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_16431_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_16443 = const()[name = string("op_16443"), val = tensor([0, 2, 1])]; + tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; + tensor var_16444_cast_fp16 = transpose(perm = var_16443, x = x_317_cast_fp16)[name = string("transpose_55")]; + tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_16444_cast_fp16)[name = string("input_395_cast_fp16")]; + string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; + tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; + tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; + int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(756274176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762246208))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(762467456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768439488))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; + string var_16469_mode_0 = const()[name = string("op_16469_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_16469_cast_fp16 = gelu(mode = var_16469_mode_0, x = x_319_cast_fp16)[name = string("op_16469_cast_fp16")]; + tensor input_397_cast_fp16 = mul(x = var_16469_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768660736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774632768))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; + tensor var_16477_axes_0 = const()[name = string("op_16477_axes_0"), val = tensor([2])]; + tensor var_16477_cast_fp16 = squeeze(axes = var_16477_axes_0, x = e_39_cast_fp16)[name = string("op_16477_cast_fp16")]; + tensor var_16478 = const()[name = string("op_16478"), val = tensor([0, 2, 1])]; + int32 var_16489 = const()[name = string("op_16489"), val = int32(-1)]; + fp16 const_839_promoted_to_fp16 = const()[name = string("const_839_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_321_cast_fp16 = transpose(perm = var_16478, x = var_16477_cast_fp16)[name = string("transpose_54")]; + tensor var_16491_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_839_promoted_to_fp16)[name = string("op_16491_cast_fp16")]; + bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; + tensor input_399_cast_fp16 = concat(axis = var_16489, interleave = input_399_interleave_0, values = (hidden_states_321_cast_fp16, var_16491_cast_fp16))[name = string("input_399_cast_fp16")]; + tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; + fp16 var_16486_to_fp16 = const()[name = string("op_16486_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_16486_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; + tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; + tensor var_16505_to_fp16 = const()[name = string("op_16505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774669696)))]; + tensor hidden_states_323_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_16505_to_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor hidden_states_325_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = hidden_states_323_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; + int32 var_16559 = const()[name = string("op_16559"), val = int32(-1)]; + fp16 const_844_promoted_to_fp16 = const()[name = string("const_844_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16561_cast_fp16 = mul(x = hidden_states_325_cast_fp16, y = const_844_promoted_to_fp16)[name = string("op_16561_cast_fp16")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401_cast_fp16 = concat(axis = var_16559, interleave = input_401_interleave_0, values = (hidden_states_325_cast_fp16, var_16561_cast_fp16))[name = string("input_401_cast_fp16")]; + tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; + fp16 var_16556_to_fp16 = const()[name = string("op_16556_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_16556_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; + tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; + tensor var_16575_to_fp16 = const()[name = string("op_16575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774672064)))]; + tensor hidden_states_327_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_16575_to_fp16)[name = string("hidden_states_327_cast_fp16")]; + tensor var_16586 = const()[name = string("op_16586"), val = tensor([0, 2, 1])]; + tensor var_16589_axes_0 = const()[name = string("op_16589_axes_0"), val = tensor([2])]; + tensor var_16587_cast_fp16 = transpose(perm = var_16586, x = hidden_states_327_cast_fp16)[name = string("transpose_53")]; + tensor var_16589_cast_fp16 = expand_dims(axes = var_16589_axes_0, x = var_16587_cast_fp16)[name = string("op_16589_cast_fp16")]; + string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; + tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; + tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; + int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; + tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_16589_cast_fp16)[name = string("query_states_161")]; + string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; + tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; + tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; + int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; + tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_16589_cast_fp16)[name = string("key_states_201")]; + string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; + tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; + tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; + int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; + tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_16589_cast_fp16)[name = string("value_states_161")]; + tensor var_16631 = const()[name = string("op_16631"), val = tensor([1, 4, 256, 64])]; + tensor var_16632 = reshape(shape = var_16631, x = query_states_161)[name = string("op_16632")]; + tensor var_16637 = const()[name = string("op_16637"), val = tensor([0, 1, 3, 2])]; + tensor var_16642 = const()[name = string("op_16642"), val = tensor([1, 1, 256, 64])]; + tensor var_16643 = reshape(shape = var_16642, x = key_states_201)[name = string("op_16643")]; + tensor var_16648 = const()[name = string("op_16648"), val = tensor([0, 1, 3, 2])]; + tensor var_16653 = const()[name = string("op_16653"), val = tensor([1, 1, 256, 64])]; + tensor var_16654 = reshape(shape = var_16653, x = value_states_161)[name = string("op_16654")]; + tensor var_16659 = const()[name = string("op_16659"), val = tensor([0, 1, 3, 2])]; + int32 var_16670 = const()[name = string("op_16670"), val = int32(-1)]; + fp16 const_849_promoted = const()[name = string("const_849_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_329 = transpose(perm = var_16637, x = var_16632)[name = string("transpose_52")]; + tensor var_16672 = mul(x = hidden_states_329, y = const_849_promoted)[name = string("op_16672")]; + bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; + tensor input_405 = concat(axis = var_16670, interleave = input_405_interleave_0, values = (hidden_states_329, var_16672))[name = string("input_405")]; + tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; + fp16 var_16667_to_fp16 = const()[name = string("op_16667_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_16667_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; + tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; + tensor var_16686_to_fp16 = const()[name = string("op_16686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774674432)))]; + tensor q_41_cast_fp16 = mul(x = normed_487, y = var_16686_to_fp16)[name = string("q_41_cast_fp16")]; + int32 var_16697 = const()[name = string("op_16697"), val = int32(-1)]; + fp16 const_853_promoted = const()[name = string("const_853_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_331 = transpose(perm = var_16648, x = var_16643)[name = string("transpose_51")]; + tensor var_16699 = mul(x = hidden_states_331, y = const_853_promoted)[name = string("op_16699")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407 = concat(axis = var_16697, interleave = input_407_interleave_0, values = (hidden_states_331, var_16699))[name = string("input_407")]; + tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; + fp16 var_16694_to_fp16 = const()[name = string("op_16694_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_16694_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; + tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; + tensor var_16713_to_fp16 = const()[name = string("op_16713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675008)))]; + tensor k_41_cast_fp16 = mul(x = normed_491, y = var_16713_to_fp16)[name = string("k_41_cast_fp16")]; + tensor var_16727_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5)[name = string("op_16727_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_859_promoted_to_fp16 = const()[name = string("const_859_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16748_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_859_promoted_to_fp16)[name = string("op_16748_cast_fp16")]; + int32 var_16750 = const()[name = string("op_16750"), val = int32(-1)]; + bool var_16751_interleave_0 = const()[name = string("op_16751_interleave_0"), val = bool(false)]; + tensor var_16751_cast_fp16 = concat(axis = var_16750, interleave = var_16751_interleave_0, values = (var_16748_cast_fp16, x1_81_cast_fp16))[name = string("op_16751_cast_fp16")]; + tensor var_16752_cast_fp16 = mul(x = var_16751_cast_fp16, y = sin_5)[name = string("op_16752_cast_fp16")]; + tensor query_states_163_cast_fp16 = add(x = var_16727_cast_fp16, y = var_16752_cast_fp16)[name = string("query_states_163_cast_fp16")]; + tensor var_16755_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5)[name = string("op_16755_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16776_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_16776_cast_fp16")]; + int32 var_16778 = const()[name = string("op_16778"), val = int32(-1)]; + bool var_16779_interleave_0 = const()[name = string("op_16779_interleave_0"), val = bool(false)]; + tensor var_16779_cast_fp16 = concat(axis = var_16778, interleave = var_16779_interleave_0, values = (var_16776_cast_fp16, x1_83_cast_fp16))[name = string("op_16779_cast_fp16")]; + tensor var_16780_cast_fp16 = mul(x = var_16779_cast_fp16, y = sin_5)[name = string("op_16780_cast_fp16")]; + tensor key_states_203_cast_fp16 = add(x = var_16755_cast_fp16, y = var_16780_cast_fp16)[name = string("key_states_203_cast_fp16")]; + tensor key_slice_35_begin_0 = const()[name = string("key_slice_35_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor key_slice_35_end_0 = const()[name = string("key_slice_35_end_0"), val = tensor([18, 1, 512, 256])]; + tensor key_slice_35_end_mask_0 = const()[name = string("key_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_35_cast_fp16 = slice_by_index(begin = key_slice_35_begin_0, end = key_slice_35_end_0, end_mask = key_slice_35_end_mask_0, x = coreml_update_state_91)[name = string("key_slice_35_cast_fp16")]; + tensor var_16817_begin_0 = const()[name = string("op_16817_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_16817_end_0 = const()[name = string("op_16817_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_16817_end_mask_0 = const()[name = string("op_16817_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16817_cast_fp16 = slice_by_index(begin = var_16817_begin_0, end = var_16817_end_0, end_mask = var_16817_end_mask_0, x = key_slice_35_cast_fp16)[name = string("op_16817_cast_fp16")]; + int32 var_16844 = const()[name = string("op_16844"), val = int32(2)]; + bool shifted_key_35_interleave_0 = const()[name = string("shifted_key_35_interleave_0"), val = bool(false)]; + tensor shifted_key_35_cast_fp16 = concat(axis = var_16844, interleave = shifted_key_35_interleave_0, values = (var_16817_cast_fp16, key_states_203_cast_fp16))[name = string("shifted_key_35_cast_fp16")]; + tensor concat_280 = const()[name = string("concat_280"), val = tensor([17, 0, 0, 0])]; + tensor concat_281 = const()[name = string("concat_281"), val = tensor([18, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_280, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_281, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = shifted_key_35_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_196_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_196")]; + tensor value_slice_35_begin_0 = const()[name = string("value_slice_35_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor value_slice_35_end_0 = const()[name = string("value_slice_35_end_0"), val = tensor([40, 1, 512, 256])]; + tensor value_slice_35_end_mask_0 = const()[name = string("value_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_35_cast_fp16 = slice_by_index(begin = value_slice_35_begin_0, end = value_slice_35_end_0, end_mask = value_slice_35_end_mask_0, x = coreml_update_state_92)[name = string("value_slice_35_cast_fp16")]; + tensor var_16887_begin_0 = const()[name = string("op_16887_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_16887_end_0 = const()[name = string("op_16887_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_16887_end_mask_0 = const()[name = string("op_16887_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16887_cast_fp16 = slice_by_index(begin = var_16887_begin_0, end = var_16887_end_0, end_mask = var_16887_end_mask_0, x = value_slice_35_cast_fp16)[name = string("op_16887_cast_fp16")]; + int32 var_16914 = const()[name = string("op_16914"), val = int32(2)]; + bool shifted_value_35_interleave_0 = const()[name = string("shifted_value_35_interleave_0"), val = bool(false)]; + tensor value_states_163 = transpose(perm = var_16659, x = var_16654)[name = string("transpose_50")]; + tensor shifted_value_35_cast_fp16 = concat(axis = var_16914, interleave = shifted_value_35_interleave_0, values = (var_16887_cast_fp16, value_states_163))[name = string("shifted_value_35_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([39, 0, 0, 0])]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([40, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_282, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_283, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = shifted_value_35_cast_fp16, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_197_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_197")]; + tensor var_16942_begin_0 = const()[name = string("op_16942_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_16942_end_0 = const()[name = string("op_16942_end_0"), val = tensor([18, 1, 512, 256])]; + tensor var_16942_end_mask_0 = const()[name = string("op_16942_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16942_cast_fp16 = slice_by_index(begin = var_16942_begin_0, end = var_16942_end_0, end_mask = var_16942_end_mask_0, x = coreml_update_state_93)[name = string("op_16942_cast_fp16")]; + tensor var_16949_begin_0 = const()[name = string("op_16949_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_16949_end_0 = const()[name = string("op_16949_end_0"), val = tensor([40, 1, 512, 256])]; + tensor var_16949_end_mask_0 = const()[name = string("op_16949_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16949_cast_fp16 = slice_by_index(begin = var_16949_begin_0, end = var_16949_end_0, end_mask = var_16949_end_mask_0, x = coreml_update_state_93)[name = string("op_16949_cast_fp16")]; + tensor var_16988 = const()[name = string("op_16988"), val = tensor([1, 4, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_16988, x = var_16942_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_17008 = const()[name = string("op_17008"), val = tensor([1, 4, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_17008, x = var_16949_cast_fp16)[name = string("x_331_cast_fp16")]; + bool var_17035_transpose_x_0 = const()[name = string("op_17035_transpose_x_0"), val = bool(false)]; + bool var_17035_transpose_y_0 = const()[name = string("op_17035_transpose_y_0"), val = bool(true)]; + tensor var_17035 = matmul(transpose_x = var_17035_transpose_x_0, transpose_y = var_17035_transpose_y_0, x = query_states_163_cast_fp16, y = x_325_cast_fp16)[name = string("op_17035")]; + fp16 var_17036_to_fp16 = const()[name = string("op_17036_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_81_cast_fp16 = mul(x = var_17035, y = var_17036_to_fp16)[name = string("attn_weights_81_cast_fp16")]; + tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = mask_slice_1)[name = string("attn_weights_83_cast_fp16")]; + int32 var_17071 = const()[name = string("op_17071"), val = int32(-1)]; + tensor var_17073_cast_fp16 = softmax(axis = var_17071, x = attn_weights_83_cast_fp16)[name = string("op_17073_cast_fp16")]; + tensor concat_288 = const()[name = string("concat_288"), val = tensor([4, 64, 512])]; + tensor reshape_60_cast_fp16 = reshape(shape = concat_288, x = var_17073_cast_fp16)[name = string("reshape_60_cast_fp16")]; + tensor concat_289 = const()[name = string("concat_289"), val = tensor([4, 512, 256])]; + tensor reshape_61_cast_fp16 = reshape(shape = concat_289, x = x_331_cast_fp16)[name = string("reshape_61_cast_fp16")]; + bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; + bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; + tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; + tensor concat_293 = const()[name = string("concat_293"), val = tensor([1, 4, 64, 256])]; + tensor reshape_62_cast_fp16 = reshape(shape = concat_293, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; + tensor var_17085_perm_0 = const()[name = string("op_17085_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17104 = const()[name = string("op_17104"), val = tensor([1, 64, 1024])]; + tensor var_17085_cast_fp16 = transpose(perm = var_17085_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_49")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_17104, x = var_17085_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_17109 = const()[name = string("op_17109"), val = tensor([0, 2, 1])]; + string var_17125_pad_type_0 = const()[name = string("op_17125_pad_type_0"), val = string("valid")]; + int32 var_17125_groups_0 = const()[name = string("op_17125_groups_0"), val = int32(1)]; + tensor var_17125_strides_0 = const()[name = string("op_17125_strides_0"), val = tensor([1])]; + tensor var_17125_pad_0 = const()[name = string("op_17125_pad_0"), val = tensor([0, 0])]; + tensor var_17125_dilations_0 = const()[name = string("op_17125_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774675584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775560384))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17110_cast_fp16 = transpose(perm = var_17109, x = attn_output_205_cast_fp16)[name = string("transpose_48")]; + tensor var_17125_cast_fp16 = conv(dilations = var_17125_dilations_0, groups = var_17125_groups_0, pad = var_17125_pad_0, pad_type = var_17125_pad_type_0, strides = var_17125_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_17110_cast_fp16)[name = string("op_17125_cast_fp16")]; + tensor var_17129 = const()[name = string("op_17129"), val = tensor([0, 2, 1])]; + int32 var_17140 = const()[name = string("op_17140"), val = int32(-1)]; + fp16 const_873_promoted_to_fp16 = const()[name = string("const_873_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_333_cast_fp16 = transpose(perm = var_17129, x = var_17125_cast_fp16)[name = string("transpose_47")]; + tensor var_17142_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_873_promoted_to_fp16)[name = string("op_17142_cast_fp16")]; + bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; + tensor input_411_cast_fp16 = concat(axis = var_17140, interleave = input_411_interleave_0, values = (hidden_states_333_cast_fp16, var_17142_cast_fp16))[name = string("input_411_cast_fp16")]; + tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; + fp16 var_17137_to_fp16 = const()[name = string("op_17137_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_17137_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; + tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; + tensor var_17156_to_fp16 = const()[name = string("op_17156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775597312)))]; + tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_17156_to_fp16)[name = string("attn_output_209_cast_fp16")]; + tensor hidden_states_335_cast_fp16 = add(x = hidden_states_325_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; + int32 var_17169 = const()[name = string("op_17169"), val = int32(-1)]; + fp16 const_877_promoted_to_fp16 = const()[name = string("const_877_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17171_cast_fp16 = mul(x = hidden_states_335_cast_fp16, y = const_877_promoted_to_fp16)[name = string("op_17171_cast_fp16")]; + bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; + tensor input_413_cast_fp16 = concat(axis = var_17169, interleave = input_413_interleave_0, values = (hidden_states_335_cast_fp16, var_17171_cast_fp16))[name = string("input_413_cast_fp16")]; + tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; + fp16 var_17166_to_fp16 = const()[name = string("op_17166_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_17166_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; + tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; + tensor var_17185_to_fp16 = const()[name = string("op_17185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775599680)))]; + tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_17185_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_17197 = const()[name = string("op_17197"), val = tensor([0, 2, 1])]; + tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; + tensor var_17198_cast_fp16 = transpose(perm = var_17197, x = x_333_cast_fp16)[name = string("transpose_46")]; + tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_17198_cast_fp16)[name = string("input_415_cast_fp16")]; + string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; + tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; + tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; + int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775602048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781574080))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781795328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787767360))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; + string var_17223_mode_0 = const()[name = string("op_17223_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17223_cast_fp16 = gelu(mode = var_17223_mode_0, x = x_335_cast_fp16)[name = string("op_17223_cast_fp16")]; + tensor input_417_cast_fp16 = mul(x = var_17223_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787988608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793960640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; + tensor var_17231_axes_0 = const()[name = string("op_17231_axes_0"), val = tensor([2])]; + tensor var_17231_cast_fp16 = squeeze(axes = var_17231_axes_0, x = e_41_cast_fp16)[name = string("op_17231_cast_fp16")]; + tensor var_17232 = const()[name = string("op_17232"), val = tensor([0, 2, 1])]; + int32 var_17243 = const()[name = string("op_17243"), val = int32(-1)]; + fp16 const_881_promoted_to_fp16 = const()[name = string("const_881_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_337_cast_fp16 = transpose(perm = var_17232, x = var_17231_cast_fp16)[name = string("transpose_45")]; + tensor var_17245_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_881_promoted_to_fp16)[name = string("op_17245_cast_fp16")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419_cast_fp16 = concat(axis = var_17243, interleave = input_419_interleave_0, values = (hidden_states_337_cast_fp16, var_17245_cast_fp16))[name = string("input_419_cast_fp16")]; + tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; + fp16 var_17240_to_fp16 = const()[name = string("op_17240_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_17240_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; + tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; + tensor var_17259_to_fp16 = const()[name = string("op_17259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793997568)))]; + tensor hidden_states_339_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_17259_to_fp16)[name = string("hidden_states_339_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = add(x = hidden_states_335_cast_fp16, y = hidden_states_339_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + int32 var_17313 = const()[name = string("op_17313"), val = int32(-1)]; + fp16 const_886_promoted_to_fp16 = const()[name = string("const_886_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17315_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = const_886_promoted_to_fp16)[name = string("op_17315_cast_fp16")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421_cast_fp16 = concat(axis = var_17313, interleave = input_421_interleave_0, values = (hidden_states_341_cast_fp16, var_17315_cast_fp16))[name = string("input_421_cast_fp16")]; + tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; + fp16 var_17310_to_fp16 = const()[name = string("op_17310_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_17310_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; + tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; + tensor var_17329_to_fp16 = const()[name = string("op_17329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793999936)))]; + tensor hidden_states_343_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_17329_to_fp16)[name = string("hidden_states_343_cast_fp16")]; + tensor var_17340 = const()[name = string("op_17340"), val = tensor([0, 2, 1])]; + tensor var_17343_axes_0 = const()[name = string("op_17343_axes_0"), val = tensor([2])]; + tensor var_17341_cast_fp16 = transpose(perm = var_17340, x = hidden_states_343_cast_fp16)[name = string("transpose_44")]; + tensor var_17343_cast_fp16 = expand_dims(axes = var_17343_axes_0, x = var_17341_cast_fp16)[name = string("op_17343_cast_fp16")]; + string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; + tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; + tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; + int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; + tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_17343_cast_fp16)[name = string("query_states_169")]; + string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; + tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; + tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; + int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; + tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_17343_cast_fp16)[name = string("key_states_211")]; + string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; + tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; + tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; + int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; + tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_17343_cast_fp16)[name = string("value_states_169")]; + tensor var_17385 = const()[name = string("op_17385"), val = tensor([1, 4, 256, 64])]; + tensor var_17386 = reshape(shape = var_17385, x = query_states_169)[name = string("op_17386")]; + tensor var_17391 = const()[name = string("op_17391"), val = tensor([0, 1, 3, 2])]; + tensor var_17396 = const()[name = string("op_17396"), val = tensor([1, 1, 256, 64])]; + tensor var_17397 = reshape(shape = var_17396, x = key_states_211)[name = string("op_17397")]; + tensor var_17402 = const()[name = string("op_17402"), val = tensor([0, 1, 3, 2])]; + tensor var_17407 = const()[name = string("op_17407"), val = tensor([1, 1, 256, 64])]; + tensor var_17408 = reshape(shape = var_17407, x = value_states_169)[name = string("op_17408")]; + tensor var_17413 = const()[name = string("op_17413"), val = tensor([0, 1, 3, 2])]; + int32 var_17424 = const()[name = string("op_17424"), val = int32(-1)]; + fp16 const_891_promoted = const()[name = string("const_891_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_345 = transpose(perm = var_17391, x = var_17386)[name = string("transpose_43")]; + tensor var_17426 = mul(x = hidden_states_345, y = const_891_promoted)[name = string("op_17426")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425 = concat(axis = var_17424, interleave = input_425_interleave_0, values = (hidden_states_345, var_17426))[name = string("input_425")]; + tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; + fp16 var_17421_to_fp16 = const()[name = string("op_17421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_17421_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; + tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; + tensor var_17440_to_fp16 = const()[name = string("op_17440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002304)))]; + tensor q_43_cast_fp16 = mul(x = normed_511, y = var_17440_to_fp16)[name = string("q_43_cast_fp16")]; + int32 var_17451 = const()[name = string("op_17451"), val = int32(-1)]; + fp16 const_895_promoted = const()[name = string("const_895_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_347 = transpose(perm = var_17402, x = var_17397)[name = string("transpose_42")]; + tensor var_17453 = mul(x = hidden_states_347, y = const_895_promoted)[name = string("op_17453")]; + bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; + tensor input_427 = concat(axis = var_17451, interleave = input_427_interleave_0, values = (hidden_states_347, var_17453))[name = string("input_427")]; + tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; + fp16 var_17448_to_fp16 = const()[name = string("op_17448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_17448_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; + tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; + tensor var_17467_to_fp16 = const()[name = string("op_17467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794002880)))]; + tensor k_43_cast_fp16 = mul(x = normed_515, y = var_17467_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_17481_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5)[name = string("op_17481_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_901_promoted_to_fp16 = const()[name = string("const_901_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17502_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_901_promoted_to_fp16)[name = string("op_17502_cast_fp16")]; + int32 var_17504 = const()[name = string("op_17504"), val = int32(-1)]; + bool var_17505_interleave_0 = const()[name = string("op_17505_interleave_0"), val = bool(false)]; + tensor var_17505_cast_fp16 = concat(axis = var_17504, interleave = var_17505_interleave_0, values = (var_17502_cast_fp16, x1_85_cast_fp16))[name = string("op_17505_cast_fp16")]; + tensor var_17506_cast_fp16 = mul(x = var_17505_cast_fp16, y = sin_5)[name = string("op_17506_cast_fp16")]; + tensor query_states_171_cast_fp16 = add(x = var_17481_cast_fp16, y = var_17506_cast_fp16)[name = string("query_states_171_cast_fp16")]; + tensor var_17509_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5)[name = string("op_17509_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17530_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_17530_cast_fp16")]; + int32 var_17532 = const()[name = string("op_17532"), val = int32(-1)]; + bool var_17533_interleave_0 = const()[name = string("op_17533_interleave_0"), val = bool(false)]; + tensor var_17533_cast_fp16 = concat(axis = var_17532, interleave = var_17533_interleave_0, values = (var_17530_cast_fp16, x1_87_cast_fp16))[name = string("op_17533_cast_fp16")]; + tensor var_17534_cast_fp16 = mul(x = var_17533_cast_fp16, y = sin_5)[name = string("op_17534_cast_fp16")]; + tensor key_states_213_cast_fp16 = add(x = var_17509_cast_fp16, y = var_17534_cast_fp16)[name = string("key_states_213_cast_fp16")]; + tensor key_slice_37_begin_0 = const()[name = string("key_slice_37_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor key_slice_37_end_0 = const()[name = string("key_slice_37_end_0"), val = tensor([19, 1, 512, 256])]; + tensor key_slice_37_end_mask_0 = const()[name = string("key_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_37_cast_fp16 = slice_by_index(begin = key_slice_37_begin_0, end = key_slice_37_end_0, end_mask = key_slice_37_end_mask_0, x = coreml_update_state_93)[name = string("key_slice_37_cast_fp16")]; + tensor var_17571_begin_0 = const()[name = string("op_17571_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_17571_end_0 = const()[name = string("op_17571_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_17571_end_mask_0 = const()[name = string("op_17571_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_17571_cast_fp16 = slice_by_index(begin = var_17571_begin_0, end = var_17571_end_0, end_mask = var_17571_end_mask_0, x = key_slice_37_cast_fp16)[name = string("op_17571_cast_fp16")]; + int32 var_17598 = const()[name = string("op_17598"), val = int32(2)]; + bool shifted_key_37_interleave_0 = const()[name = string("shifted_key_37_interleave_0"), val = bool(false)]; + tensor shifted_key_37_cast_fp16 = concat(axis = var_17598, interleave = shifted_key_37_interleave_0, values = (var_17571_cast_fp16, key_states_213_cast_fp16))[name = string("shifted_key_37_cast_fp16")]; + tensor concat_294 = const()[name = string("concat_294"), val = tensor([18, 0, 0, 0])]; + tensor concat_295 = const()[name = string("concat_295"), val = tensor([19, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = shifted_key_37_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_198_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_198")]; + tensor value_slice_37_begin_0 = const()[name = string("value_slice_37_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor value_slice_37_end_0 = const()[name = string("value_slice_37_end_0"), val = tensor([41, 1, 512, 256])]; + tensor value_slice_37_end_mask_0 = const()[name = string("value_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_37_cast_fp16 = slice_by_index(begin = value_slice_37_begin_0, end = value_slice_37_end_0, end_mask = value_slice_37_end_mask_0, x = coreml_update_state_94)[name = string("value_slice_37_cast_fp16")]; + tensor var_17641_begin_0 = const()[name = string("op_17641_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_17641_end_0 = const()[name = string("op_17641_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_17641_end_mask_0 = const()[name = string("op_17641_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_17641_cast_fp16 = slice_by_index(begin = var_17641_begin_0, end = var_17641_end_0, end_mask = var_17641_end_mask_0, x = value_slice_37_cast_fp16)[name = string("op_17641_cast_fp16")]; + int32 var_17668 = const()[name = string("op_17668"), val = int32(2)]; + bool shifted_value_37_interleave_0 = const()[name = string("shifted_value_37_interleave_0"), val = bool(false)]; + tensor value_states_171 = transpose(perm = var_17413, x = var_17408)[name = string("transpose_41")]; + tensor shifted_value_37_cast_fp16 = concat(axis = var_17668, interleave = shifted_value_37_interleave_0, values = (var_17641_cast_fp16, value_states_171))[name = string("shifted_value_37_cast_fp16")]; + tensor concat_296 = const()[name = string("concat_296"), val = tensor([40, 0, 0, 0])]; + tensor concat_297 = const()[name = string("concat_297"), val = tensor([41, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_296, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_297, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = shifted_value_37_cast_fp16, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_199_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_199")]; + tensor var_17696_begin_0 = const()[name = string("op_17696_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_17696_end_0 = const()[name = string("op_17696_end_0"), val = tensor([19, 1, 512, 256])]; + tensor var_17696_end_mask_0 = const()[name = string("op_17696_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17696_cast_fp16 = slice_by_index(begin = var_17696_begin_0, end = var_17696_end_0, end_mask = var_17696_end_mask_0, x = coreml_update_state_95)[name = string("op_17696_cast_fp16")]; + tensor var_17703_begin_0 = const()[name = string("op_17703_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_17703_end_0 = const()[name = string("op_17703_end_0"), val = tensor([41, 1, 512, 256])]; + tensor var_17703_end_mask_0 = const()[name = string("op_17703_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_17703_cast_fp16 = slice_by_index(begin = var_17703_begin_0, end = var_17703_end_0, end_mask = var_17703_end_mask_0, x = coreml_update_state_95)[name = string("op_17703_cast_fp16")]; + tensor var_17742 = const()[name = string("op_17742"), val = tensor([1, 4, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_17742, x = var_17696_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_17762 = const()[name = string("op_17762"), val = tensor([1, 4, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_17762, x = var_17703_cast_fp16)[name = string("x_347_cast_fp16")]; + bool var_17789_transpose_x_0 = const()[name = string("op_17789_transpose_x_0"), val = bool(false)]; + bool var_17789_transpose_y_0 = const()[name = string("op_17789_transpose_y_0"), val = bool(true)]; + tensor var_17789 = matmul(transpose_x = var_17789_transpose_x_0, transpose_y = var_17789_transpose_y_0, x = query_states_171_cast_fp16, y = x_341_cast_fp16)[name = string("op_17789")]; + fp16 var_17790_to_fp16 = const()[name = string("op_17790_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_17789, y = var_17790_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = mask_slice_1)[name = string("attn_weights_87_cast_fp16")]; + int32 var_17825 = const()[name = string("op_17825"), val = int32(-1)]; + tensor var_17827_cast_fp16 = softmax(axis = var_17825, x = attn_weights_87_cast_fp16)[name = string("op_17827_cast_fp16")]; + tensor concat_302 = const()[name = string("concat_302"), val = tensor([4, 64, 512])]; + tensor reshape_63_cast_fp16 = reshape(shape = concat_302, x = var_17827_cast_fp16)[name = string("reshape_63_cast_fp16")]; + tensor concat_303 = const()[name = string("concat_303"), val = tensor([4, 512, 256])]; + tensor reshape_64_cast_fp16 = reshape(shape = concat_303, x = x_347_cast_fp16)[name = string("reshape_64_cast_fp16")]; + bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; + bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; + tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; + tensor concat_307 = const()[name = string("concat_307"), val = tensor([1, 4, 64, 256])]; + tensor reshape_65_cast_fp16 = reshape(shape = concat_307, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; + tensor var_17839_perm_0 = const()[name = string("op_17839_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_17858 = const()[name = string("op_17858"), val = tensor([1, 64, 1024])]; + tensor var_17839_cast_fp16 = transpose(perm = var_17839_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_40")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_17858, x = var_17839_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_17863 = const()[name = string("op_17863"), val = tensor([0, 2, 1])]; + string var_17879_pad_type_0 = const()[name = string("op_17879_pad_type_0"), val = string("valid")]; + int32 var_17879_groups_0 = const()[name = string("op_17879_groups_0"), val = int32(1)]; + tensor var_17879_strides_0 = const()[name = string("op_17879_strides_0"), val = tensor([1])]; + tensor var_17879_pad_0 = const()[name = string("op_17879_pad_0"), val = tensor([0, 0])]; + tensor var_17879_dilations_0 = const()[name = string("op_17879_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794003456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794888256))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_17864_cast_fp16 = transpose(perm = var_17863, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; + tensor var_17879_cast_fp16 = conv(dilations = var_17879_dilations_0, groups = var_17879_groups_0, pad = var_17879_pad_0, pad_type = var_17879_pad_type_0, strides = var_17879_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_17864_cast_fp16)[name = string("op_17879_cast_fp16")]; + tensor var_17883 = const()[name = string("op_17883"), val = tensor([0, 2, 1])]; + int32 var_17894 = const()[name = string("op_17894"), val = int32(-1)]; + fp16 const_915_promoted_to_fp16 = const()[name = string("const_915_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_349_cast_fp16 = transpose(perm = var_17883, x = var_17879_cast_fp16)[name = string("transpose_38")]; + tensor var_17896_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_915_promoted_to_fp16)[name = string("op_17896_cast_fp16")]; + bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; + tensor input_431_cast_fp16 = concat(axis = var_17894, interleave = input_431_interleave_0, values = (hidden_states_349_cast_fp16, var_17896_cast_fp16))[name = string("input_431_cast_fp16")]; + tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; + fp16 var_17891_to_fp16 = const()[name = string("op_17891_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_17891_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; + tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; + tensor var_17910_to_fp16 = const()[name = string("op_17910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794925184)))]; + tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_17910_to_fp16)[name = string("attn_output_219_cast_fp16")]; + tensor hidden_states_351_cast_fp16 = add(x = hidden_states_341_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; + int32 var_17923 = const()[name = string("op_17923"), val = int32(-1)]; + fp16 const_919_promoted_to_fp16 = const()[name = string("const_919_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_17925_cast_fp16 = mul(x = hidden_states_351_cast_fp16, y = const_919_promoted_to_fp16)[name = string("op_17925_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_17923, interleave = input_433_interleave_0, values = (hidden_states_351_cast_fp16, var_17925_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; + fp16 var_17920_to_fp16 = const()[name = string("op_17920_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_17920_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; + tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; + tensor var_17939_to_fp16 = const()[name = string("op_17939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794927552)))]; + tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_17939_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_17951 = const()[name = string("op_17951"), val = tensor([0, 2, 1])]; + tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; + tensor var_17952_cast_fp16 = transpose(perm = var_17951, x = x_349_cast_fp16)[name = string("transpose_37")]; + tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_17952_cast_fp16)[name = string("input_435_cast_fp16")]; + string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; + tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; + tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; + int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794929920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800901952))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801123200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807095232))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; + string var_17977_mode_0 = const()[name = string("op_17977_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_17977_cast_fp16 = gelu(mode = var_17977_mode_0, x = x_351_cast_fp16)[name = string("op_17977_cast_fp16")]; + tensor input_437_cast_fp16 = mul(x = var_17977_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807316480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813288512))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; + tensor var_17985_axes_0 = const()[name = string("op_17985_axes_0"), val = tensor([2])]; + tensor var_17985_cast_fp16 = squeeze(axes = var_17985_axes_0, x = e_43_cast_fp16)[name = string("op_17985_cast_fp16")]; + tensor var_17986 = const()[name = string("op_17986"), val = tensor([0, 2, 1])]; + int32 var_17997 = const()[name = string("op_17997"), val = int32(-1)]; + fp16 const_923_promoted_to_fp16 = const()[name = string("const_923_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_353_cast_fp16 = transpose(perm = var_17986, x = var_17985_cast_fp16)[name = string("transpose_36")]; + tensor var_17999_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_923_promoted_to_fp16)[name = string("op_17999_cast_fp16")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439_cast_fp16 = concat(axis = var_17997, interleave = input_439_interleave_0, values = (hidden_states_353_cast_fp16, var_17999_cast_fp16))[name = string("input_439_cast_fp16")]; + tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; + fp16 var_17994_to_fp16 = const()[name = string("op_17994_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_17994_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; + tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; + tensor var_18013_to_fp16 = const()[name = string("op_18013_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813325440)))]; + tensor hidden_states_355_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_18013_to_fp16)[name = string("hidden_states_355_cast_fp16")]; + tensor hidden_states_357_cast_fp16 = add(x = hidden_states_351_cast_fp16, y = hidden_states_355_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; + int32 var_18067 = const()[name = string("op_18067"), val = int32(-1)]; + fp16 const_928_promoted_to_fp16 = const()[name = string("const_928_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18069_cast_fp16 = mul(x = hidden_states_357_cast_fp16, y = const_928_promoted_to_fp16)[name = string("op_18069_cast_fp16")]; + bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; + tensor input_441_cast_fp16 = concat(axis = var_18067, interleave = input_441_interleave_0, values = (hidden_states_357_cast_fp16, var_18069_cast_fp16))[name = string("input_441_cast_fp16")]; + tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; + fp16 var_18064_to_fp16 = const()[name = string("op_18064_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_18064_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; + tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; + tensor var_18083_to_fp16 = const()[name = string("op_18083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813327808)))]; + tensor hidden_states_359_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_18083_to_fp16)[name = string("hidden_states_359_cast_fp16")]; + tensor var_18094 = const()[name = string("op_18094"), val = tensor([0, 2, 1])]; + tensor var_18097_axes_0 = const()[name = string("op_18097_axes_0"), val = tensor([2])]; + tensor var_18095_cast_fp16 = transpose(perm = var_18094, x = hidden_states_359_cast_fp16)[name = string("transpose_35")]; + tensor var_18097_cast_fp16 = expand_dims(axes = var_18097_axes_0, x = var_18095_cast_fp16)[name = string("op_18097_cast_fp16")]; + string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; + tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; + tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; + int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; + tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_18097_cast_fp16)[name = string("query_states_177")]; + string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; + tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; + tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; + int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; + tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_18097_cast_fp16)[name = string("key_states_221")]; + string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; + tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; + tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; + int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; + tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_18097_cast_fp16)[name = string("value_states_177")]; + tensor var_18139 = const()[name = string("op_18139"), val = tensor([1, 4, 256, 64])]; + tensor var_18140 = reshape(shape = var_18139, x = query_states_177)[name = string("op_18140")]; + tensor var_18145 = const()[name = string("op_18145"), val = tensor([0, 1, 3, 2])]; + tensor var_18150 = const()[name = string("op_18150"), val = tensor([1, 1, 256, 64])]; + tensor var_18151 = reshape(shape = var_18150, x = key_states_221)[name = string("op_18151")]; + tensor var_18156 = const()[name = string("op_18156"), val = tensor([0, 1, 3, 2])]; + tensor var_18161 = const()[name = string("op_18161"), val = tensor([1, 1, 256, 64])]; + tensor var_18162 = reshape(shape = var_18161, x = value_states_177)[name = string("op_18162")]; + tensor var_18167 = const()[name = string("op_18167"), val = tensor([0, 1, 3, 2])]; + int32 var_18178 = const()[name = string("op_18178"), val = int32(-1)]; + fp16 const_933_promoted = const()[name = string("const_933_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_361 = transpose(perm = var_18145, x = var_18140)[name = string("transpose_34")]; + tensor var_18180 = mul(x = hidden_states_361, y = const_933_promoted)[name = string("op_18180")]; + bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; + tensor input_445 = concat(axis = var_18178, interleave = input_445_interleave_0, values = (hidden_states_361, var_18180))[name = string("input_445")]; + tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; + fp16 var_18175_to_fp16 = const()[name = string("op_18175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_18175_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; + tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; + tensor var_18194_to_fp16 = const()[name = string("op_18194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330176)))]; + tensor q_45_cast_fp16 = mul(x = normed_535, y = var_18194_to_fp16)[name = string("q_45_cast_fp16")]; + int32 var_18205 = const()[name = string("op_18205"), val = int32(-1)]; + fp16 const_937_promoted = const()[name = string("const_937_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_363 = transpose(perm = var_18156, x = var_18151)[name = string("transpose_33")]; + tensor var_18207 = mul(x = hidden_states_363, y = const_937_promoted)[name = string("op_18207")]; + bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; + tensor input_447 = concat(axis = var_18205, interleave = input_447_interleave_0, values = (hidden_states_363, var_18207))[name = string("input_447")]; + tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; + fp16 var_18202_to_fp16 = const()[name = string("op_18202_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_18202_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; + tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; + tensor var_18221_to_fp16 = const()[name = string("op_18221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813330752)))]; + tensor k_45_cast_fp16 = mul(x = normed_539, y = var_18221_to_fp16)[name = string("k_45_cast_fp16")]; + tensor var_18235_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5)[name = string("op_18235_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_943_promoted_to_fp16 = const()[name = string("const_943_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18256_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_943_promoted_to_fp16)[name = string("op_18256_cast_fp16")]; + int32 var_18258 = const()[name = string("op_18258"), val = int32(-1)]; + bool var_18259_interleave_0 = const()[name = string("op_18259_interleave_0"), val = bool(false)]; + tensor var_18259_cast_fp16 = concat(axis = var_18258, interleave = var_18259_interleave_0, values = (var_18256_cast_fp16, x1_89_cast_fp16))[name = string("op_18259_cast_fp16")]; + tensor var_18260_cast_fp16 = mul(x = var_18259_cast_fp16, y = sin_5)[name = string("op_18260_cast_fp16")]; + tensor query_states_179_cast_fp16 = add(x = var_18235_cast_fp16, y = var_18260_cast_fp16)[name = string("query_states_179_cast_fp16")]; + tensor var_18263_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5)[name = string("op_18263_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18284_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_18284_cast_fp16")]; + int32 var_18286 = const()[name = string("op_18286"), val = int32(-1)]; + bool var_18287_interleave_0 = const()[name = string("op_18287_interleave_0"), val = bool(false)]; + tensor var_18287_cast_fp16 = concat(axis = var_18286, interleave = var_18287_interleave_0, values = (var_18284_cast_fp16, x1_91_cast_fp16))[name = string("op_18287_cast_fp16")]; + tensor var_18288_cast_fp16 = mul(x = var_18287_cast_fp16, y = sin_5)[name = string("op_18288_cast_fp16")]; + tensor key_states_223_cast_fp16 = add(x = var_18263_cast_fp16, y = var_18288_cast_fp16)[name = string("key_states_223_cast_fp16")]; + tensor key_slice_39_begin_0 = const()[name = string("key_slice_39_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor key_slice_39_end_0 = const()[name = string("key_slice_39_end_0"), val = tensor([20, 1, 512, 256])]; + tensor key_slice_39_end_mask_0 = const()[name = string("key_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_39_cast_fp16 = slice_by_index(begin = key_slice_39_begin_0, end = key_slice_39_end_0, end_mask = key_slice_39_end_mask_0, x = coreml_update_state_95)[name = string("key_slice_39_cast_fp16")]; + tensor var_18325_begin_0 = const()[name = string("op_18325_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_18325_end_0 = const()[name = string("op_18325_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_18325_end_mask_0 = const()[name = string("op_18325_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_18325_cast_fp16 = slice_by_index(begin = var_18325_begin_0, end = var_18325_end_0, end_mask = var_18325_end_mask_0, x = key_slice_39_cast_fp16)[name = string("op_18325_cast_fp16")]; + int32 var_18352 = const()[name = string("op_18352"), val = int32(2)]; + bool shifted_key_39_interleave_0 = const()[name = string("shifted_key_39_interleave_0"), val = bool(false)]; + tensor shifted_key_39_cast_fp16 = concat(axis = var_18352, interleave = shifted_key_39_interleave_0, values = (var_18325_cast_fp16, key_states_223_cast_fp16))[name = string("shifted_key_39_cast_fp16")]; + tensor concat_308 = const()[name = string("concat_308"), val = tensor([19, 0, 0, 0])]; + tensor concat_309 = const()[name = string("concat_309"), val = tensor([20, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = shifted_key_39_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_200_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_200")]; + tensor value_slice_39_begin_0 = const()[name = string("value_slice_39_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor value_slice_39_end_0 = const()[name = string("value_slice_39_end_0"), val = tensor([42, 1, 512, 256])]; + tensor value_slice_39_end_mask_0 = const()[name = string("value_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_39_cast_fp16 = slice_by_index(begin = value_slice_39_begin_0, end = value_slice_39_end_0, end_mask = value_slice_39_end_mask_0, x = coreml_update_state_96)[name = string("value_slice_39_cast_fp16")]; + tensor var_18395_begin_0 = const()[name = string("op_18395_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_18395_end_0 = const()[name = string("op_18395_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_18395_end_mask_0 = const()[name = string("op_18395_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_18395_cast_fp16 = slice_by_index(begin = var_18395_begin_0, end = var_18395_end_0, end_mask = var_18395_end_mask_0, x = value_slice_39_cast_fp16)[name = string("op_18395_cast_fp16")]; + int32 var_18422 = const()[name = string("op_18422"), val = int32(2)]; + bool shifted_value_39_interleave_0 = const()[name = string("shifted_value_39_interleave_0"), val = bool(false)]; + tensor value_states_179 = transpose(perm = var_18167, x = var_18162)[name = string("transpose_32")]; + tensor shifted_value_39_cast_fp16 = concat(axis = var_18422, interleave = shifted_value_39_interleave_0, values = (var_18395_cast_fp16, value_states_179))[name = string("shifted_value_39_cast_fp16")]; + tensor concat_310 = const()[name = string("concat_310"), val = tensor([41, 0, 0, 0])]; + tensor concat_311 = const()[name = string("concat_311"), val = tensor([42, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_310, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_311, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = shifted_value_39_cast_fp16, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_201_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_201")]; + tensor var_18450_begin_0 = const()[name = string("op_18450_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_18450_end_0 = const()[name = string("op_18450_end_0"), val = tensor([20, 1, 512, 256])]; + tensor var_18450_end_mask_0 = const()[name = string("op_18450_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_18450_cast_fp16 = slice_by_index(begin = var_18450_begin_0, end = var_18450_end_0, end_mask = var_18450_end_mask_0, x = coreml_update_state_97)[name = string("op_18450_cast_fp16")]; + tensor var_18457_begin_0 = const()[name = string("op_18457_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_18457_end_0 = const()[name = string("op_18457_end_0"), val = tensor([42, 1, 512, 256])]; + tensor var_18457_end_mask_0 = const()[name = string("op_18457_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_18457_cast_fp16 = slice_by_index(begin = var_18457_begin_0, end = var_18457_end_0, end_mask = var_18457_end_mask_0, x = coreml_update_state_97)[name = string("op_18457_cast_fp16")]; + tensor var_18496 = const()[name = string("op_18496"), val = tensor([1, 4, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_18496, x = var_18450_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_18516 = const()[name = string("op_18516"), val = tensor([1, 4, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_18516, x = var_18457_cast_fp16)[name = string("x_363_cast_fp16")]; + bool var_18543_transpose_x_0 = const()[name = string("op_18543_transpose_x_0"), val = bool(false)]; + bool var_18543_transpose_y_0 = const()[name = string("op_18543_transpose_y_0"), val = bool(true)]; + tensor var_18543 = matmul(transpose_x = var_18543_transpose_x_0, transpose_y = var_18543_transpose_y_0, x = query_states_179_cast_fp16, y = x_357_cast_fp16)[name = string("op_18543")]; + fp16 var_18544_to_fp16 = const()[name = string("op_18544_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_89_cast_fp16 = mul(x = var_18543, y = var_18544_to_fp16)[name = string("attn_weights_89_cast_fp16")]; + tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = mask_slice_1)[name = string("attn_weights_91_cast_fp16")]; + int32 var_18579 = const()[name = string("op_18579"), val = int32(-1)]; + tensor var_18581_cast_fp16 = softmax(axis = var_18579, x = attn_weights_91_cast_fp16)[name = string("op_18581_cast_fp16")]; + tensor concat_316 = const()[name = string("concat_316"), val = tensor([4, 64, 512])]; + tensor reshape_66_cast_fp16 = reshape(shape = concat_316, x = var_18581_cast_fp16)[name = string("reshape_66_cast_fp16")]; + tensor concat_317 = const()[name = string("concat_317"), val = tensor([4, 512, 256])]; + tensor reshape_67_cast_fp16 = reshape(shape = concat_317, x = x_363_cast_fp16)[name = string("reshape_67_cast_fp16")]; + bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; + bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; + tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; + tensor concat_321 = const()[name = string("concat_321"), val = tensor([1, 4, 64, 256])]; + tensor reshape_68_cast_fp16 = reshape(shape = concat_321, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; + tensor var_18593_perm_0 = const()[name = string("op_18593_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_18612 = const()[name = string("op_18612"), val = tensor([1, 64, 1024])]; + tensor var_18593_cast_fp16 = transpose(perm = var_18593_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_31")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_18612, x = var_18593_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_18617 = const()[name = string("op_18617"), val = tensor([0, 2, 1])]; + string var_18633_pad_type_0 = const()[name = string("op_18633_pad_type_0"), val = string("valid")]; + int32 var_18633_groups_0 = const()[name = string("op_18633_groups_0"), val = int32(1)]; + tensor var_18633_strides_0 = const()[name = string("op_18633_strides_0"), val = tensor([1])]; + tensor var_18633_pad_0 = const()[name = string("op_18633_pad_0"), val = tensor([0, 0])]; + tensor var_18633_dilations_0 = const()[name = string("op_18633_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813331328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814216128))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_18618_cast_fp16 = transpose(perm = var_18617, x = attn_output_225_cast_fp16)[name = string("transpose_30")]; + tensor var_18633_cast_fp16 = conv(dilations = var_18633_dilations_0, groups = var_18633_groups_0, pad = var_18633_pad_0, pad_type = var_18633_pad_type_0, strides = var_18633_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_18618_cast_fp16)[name = string("op_18633_cast_fp16")]; + tensor var_18637 = const()[name = string("op_18637"), val = tensor([0, 2, 1])]; + int32 var_18648 = const()[name = string("op_18648"), val = int32(-1)]; + fp16 const_957_promoted_to_fp16 = const()[name = string("const_957_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_365_cast_fp16 = transpose(perm = var_18637, x = var_18633_cast_fp16)[name = string("transpose_29")]; + tensor var_18650_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_957_promoted_to_fp16)[name = string("op_18650_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_18648, interleave = input_451_interleave_0, values = (hidden_states_365_cast_fp16, var_18650_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; + fp16 var_18645_to_fp16 = const()[name = string("op_18645_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_18645_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; + tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; + tensor var_18664_to_fp16 = const()[name = string("op_18664_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814253056)))]; + tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_18664_to_fp16)[name = string("attn_output_229_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = add(x = hidden_states_357_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + int32 var_18677 = const()[name = string("op_18677"), val = int32(-1)]; + fp16 const_961_promoted_to_fp16 = const()[name = string("const_961_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18679_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = const_961_promoted_to_fp16)[name = string("op_18679_cast_fp16")]; + bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; + tensor input_453_cast_fp16 = concat(axis = var_18677, interleave = input_453_interleave_0, values = (hidden_states_367_cast_fp16, var_18679_cast_fp16))[name = string("input_453_cast_fp16")]; + tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; + fp16 var_18674_to_fp16 = const()[name = string("op_18674_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_18674_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; + tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; + tensor var_18693_to_fp16 = const()[name = string("op_18693_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814255424)))]; + tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_18693_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_18705 = const()[name = string("op_18705"), val = tensor([0, 2, 1])]; + tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; + tensor var_18706_cast_fp16 = transpose(perm = var_18705, x = x_365_cast_fp16)[name = string("transpose_28")]; + tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_18706_cast_fp16)[name = string("input_455_cast_fp16")]; + string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; + tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; + tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; + int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814257792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820229824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820451072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826423104))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; + string var_18731_mode_0 = const()[name = string("op_18731_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_18731_cast_fp16 = gelu(mode = var_18731_mode_0, x = x_367_cast_fp16)[name = string("op_18731_cast_fp16")]; + tensor input_457_cast_fp16 = mul(x = var_18731_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826644352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832616384))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; + tensor var_18739_axes_0 = const()[name = string("op_18739_axes_0"), val = tensor([2])]; + tensor var_18739_cast_fp16 = squeeze(axes = var_18739_axes_0, x = e_45_cast_fp16)[name = string("op_18739_cast_fp16")]; + tensor var_18740 = const()[name = string("op_18740"), val = tensor([0, 2, 1])]; + int32 var_18751 = const()[name = string("op_18751"), val = int32(-1)]; + fp16 const_965_promoted_to_fp16 = const()[name = string("const_965_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_369_cast_fp16 = transpose(perm = var_18740, x = var_18739_cast_fp16)[name = string("transpose_27")]; + tensor var_18753_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_965_promoted_to_fp16)[name = string("op_18753_cast_fp16")]; + bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; + tensor input_459_cast_fp16 = concat(axis = var_18751, interleave = input_459_interleave_0, values = (hidden_states_369_cast_fp16, var_18753_cast_fp16))[name = string("input_459_cast_fp16")]; + tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; + fp16 var_18748_to_fp16 = const()[name = string("op_18748_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_18748_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; + tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; + tensor var_18767_to_fp16 = const()[name = string("op_18767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832653312)))]; + tensor hidden_states_371_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_18767_to_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor hidden_states_373_cast_fp16 = add(x = hidden_states_367_cast_fp16, y = hidden_states_371_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; + int32 var_18821 = const()[name = string("op_18821"), val = int32(-1)]; + fp16 const_970_promoted_to_fp16 = const()[name = string("const_970_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_18823_cast_fp16 = mul(x = hidden_states_373_cast_fp16, y = const_970_promoted_to_fp16)[name = string("op_18823_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_18821, interleave = input_461_interleave_0, values = (hidden_states_373_cast_fp16, var_18823_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; + fp16 var_18818_to_fp16 = const()[name = string("op_18818_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_18818_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; + tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; + tensor var_18837_to_fp16 = const()[name = string("op_18837_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832655680)))]; + tensor hidden_states_375_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_18837_to_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor var_18848 = const()[name = string("op_18848"), val = tensor([0, 2, 1])]; + tensor var_18851_axes_0 = const()[name = string("op_18851_axes_0"), val = tensor([2])]; + tensor var_18849_cast_fp16 = transpose(perm = var_18848, x = hidden_states_375_cast_fp16)[name = string("transpose_26")]; + tensor var_18851_cast_fp16 = expand_dims(axes = var_18851_axes_0, x = var_18849_cast_fp16)[name = string("op_18851_cast_fp16")]; + string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; + tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; + tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; + int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; + tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_18851_cast_fp16)[name = string("query_states_185")]; + string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; + tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; + tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; + int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; + tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_18851_cast_fp16)[name = string("key_states_231")]; + string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; + tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; + tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; + int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; + tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_18851_cast_fp16)[name = string("value_states_185")]; + tensor var_18893 = const()[name = string("op_18893"), val = tensor([1, 4, 256, 64])]; + tensor var_18894 = reshape(shape = var_18893, x = query_states_185)[name = string("op_18894")]; + tensor var_18899 = const()[name = string("op_18899"), val = tensor([0, 1, 3, 2])]; + tensor var_18904 = const()[name = string("op_18904"), val = tensor([1, 1, 256, 64])]; + tensor var_18905 = reshape(shape = var_18904, x = key_states_231)[name = string("op_18905")]; + tensor var_18910 = const()[name = string("op_18910"), val = tensor([0, 1, 3, 2])]; + tensor var_18915 = const()[name = string("op_18915"), val = tensor([1, 1, 256, 64])]; + tensor var_18916 = reshape(shape = var_18915, x = value_states_185)[name = string("op_18916")]; + tensor var_18921 = const()[name = string("op_18921"), val = tensor([0, 1, 3, 2])]; + int32 var_18932 = const()[name = string("op_18932"), val = int32(-1)]; + fp16 const_975_promoted = const()[name = string("const_975_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_377 = transpose(perm = var_18899, x = var_18894)[name = string("transpose_25")]; + tensor var_18934 = mul(x = hidden_states_377, y = const_975_promoted)[name = string("op_18934")]; + bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; + tensor input_465 = concat(axis = var_18932, interleave = input_465_interleave_0, values = (hidden_states_377, var_18934))[name = string("input_465")]; + tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; + fp16 var_18929_to_fp16 = const()[name = string("op_18929_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_18929_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; + tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; + tensor var_18948_to_fp16 = const()[name = string("op_18948_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658048)))]; + tensor q_47_cast_fp16 = mul(x = normed_559, y = var_18948_to_fp16)[name = string("q_47_cast_fp16")]; + int32 var_18959 = const()[name = string("op_18959"), val = int32(-1)]; + fp16 const_979_promoted = const()[name = string("const_979_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_379 = transpose(perm = var_18910, x = var_18905)[name = string("transpose_24")]; + tensor var_18961 = mul(x = hidden_states_379, y = const_979_promoted)[name = string("op_18961")]; + bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; + tensor input_467 = concat(axis = var_18959, interleave = input_467_interleave_0, values = (hidden_states_379, var_18961))[name = string("input_467")]; + tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; + fp16 var_18956_to_fp16 = const()[name = string("op_18956_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_18956_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; + tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; + tensor var_18975_to_fp16 = const()[name = string("op_18975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832658624)))]; + tensor k_47_cast_fp16 = mul(x = normed_563, y = var_18975_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_18989_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_35)[name = string("op_18989_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_985_promoted_to_fp16 = const()[name = string("const_985_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19010_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_985_promoted_to_fp16)[name = string("op_19010_cast_fp16")]; + int32 var_19012 = const()[name = string("op_19012"), val = int32(-1)]; + bool var_19013_interleave_0 = const()[name = string("op_19013_interleave_0"), val = bool(false)]; + tensor var_19013_cast_fp16 = concat(axis = var_19012, interleave = var_19013_interleave_0, values = (var_19010_cast_fp16, x1_93_cast_fp16))[name = string("op_19013_cast_fp16")]; + tensor var_19014_cast_fp16 = mul(x = var_19013_cast_fp16, y = sin_35)[name = string("op_19014_cast_fp16")]; + tensor query_states_187_cast_fp16 = add(x = var_18989_cast_fp16, y = var_19014_cast_fp16)[name = string("query_states_187_cast_fp16")]; + tensor var_19017_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_35)[name = string("op_19017_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19038_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_19038_cast_fp16")]; + int32 var_19040 = const()[name = string("op_19040"), val = int32(-1)]; + bool var_19041_interleave_0 = const()[name = string("op_19041_interleave_0"), val = bool(false)]; + tensor var_19041_cast_fp16 = concat(axis = var_19040, interleave = var_19041_interleave_0, values = (var_19038_cast_fp16, x1_95_cast_fp16))[name = string("op_19041_cast_fp16")]; + tensor var_19042_cast_fp16 = mul(x = var_19041_cast_fp16, y = sin_35)[name = string("op_19042_cast_fp16")]; + tensor key_states_233_cast_fp16 = add(x = var_19017_cast_fp16, y = var_19042_cast_fp16)[name = string("key_states_233_cast_fp16")]; + tensor concat_322 = const()[name = string("concat_322"), val = tensor([3, 0, 0, 0])]; + tensor concat_323 = const()[name = string("concat_323"), val = tensor([4, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_322, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_323, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_233_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_202_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_202")]; + tensor concat_324 = const()[name = string("concat_324"), val = tensor([7, 0, 0, 0])]; + tensor concat_325 = const()[name = string("concat_325"), val = tensor([8, 0, 64, 0])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_187 = transpose(perm = var_18921, x = var_18916)[name = string("transpose_23")]; + tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_324, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_325, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = value_states_187, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_203_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_203")]; + tensor var_19141_begin_0 = const()[name = string("op_19141_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_19141_end_0 = const()[name = string("op_19141_end_0"), val = tensor([4, 1, 4096, 256])]; + tensor var_19141_end_mask_0 = const()[name = string("op_19141_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_19141_cast_fp16 = slice_by_index(begin = var_19141_begin_0, end = var_19141_end_0, end_mask = var_19141_end_mask_0, x = coreml_update_state_99)[name = string("op_19141_cast_fp16")]; + tensor var_19148_begin_0 = const()[name = string("op_19148_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_19148_end_0 = const()[name = string("op_19148_end_0"), val = tensor([1, 1, 4096, 256])]; + tensor var_19148_end_mask_0 = const()[name = string("op_19148_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_19148_cast_fp16 = slice_by_index(begin = var_19148_begin_0, end = var_19148_end_0, end_mask = var_19148_end_mask_0, x = coreml_update_state_99)[name = string("op_19148_cast_fp16")]; + tensor var_19187 = const()[name = string("op_19187"), val = tensor([1, 4, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_19187, x = var_19141_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_19207 = const()[name = string("op_19207"), val = tensor([1, 4, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_19207, x = var_19148_cast_fp16)[name = string("x_379_cast_fp16")]; + bool var_19234_transpose_x_0 = const()[name = string("op_19234_transpose_x_0"), val = bool(false)]; + bool var_19234_transpose_y_0 = const()[name = string("op_19234_transpose_y_0"), val = bool(true)]; + tensor var_19234 = matmul(transpose_x = var_19234_transpose_x_0, transpose_y = var_19234_transpose_y_0, x = query_states_187_cast_fp16, y = x_373_cast_fp16)[name = string("op_19234")]; + fp16 var_19235_to_fp16 = const()[name = string("op_19235_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_93_cast_fp16 = mul(x = var_19234, y = var_19235_to_fp16)[name = string("attn_weights_93_cast_fp16")]; + tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; + int32 var_19270 = const()[name = string("op_19270"), val = int32(-1)]; + tensor var_19272_cast_fp16 = softmax(axis = var_19270, x = attn_weights_95_cast_fp16)[name = string("op_19272_cast_fp16")]; + tensor concat_330 = const()[name = string("concat_330"), val = tensor([4, 64, 4096])]; + tensor reshape_69_cast_fp16 = reshape(shape = concat_330, x = var_19272_cast_fp16)[name = string("reshape_69_cast_fp16")]; + tensor concat_331 = const()[name = string("concat_331"), val = tensor([4, 4096, 256])]; + tensor reshape_70_cast_fp16 = reshape(shape = concat_331, x = x_379_cast_fp16)[name = string("reshape_70_cast_fp16")]; + bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; + bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; + tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; + tensor concat_335 = const()[name = string("concat_335"), val = tensor([1, 4, 64, 256])]; + tensor reshape_71_cast_fp16 = reshape(shape = concat_335, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; + tensor var_19284_perm_0 = const()[name = string("op_19284_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_19303 = const()[name = string("op_19303"), val = tensor([1, 64, 1024])]; + tensor var_19284_cast_fp16 = transpose(perm = var_19284_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_22")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_19303, x = var_19284_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_19308 = const()[name = string("op_19308"), val = tensor([0, 2, 1])]; + string var_19324_pad_type_0 = const()[name = string("op_19324_pad_type_0"), val = string("valid")]; + int32 var_19324_groups_0 = const()[name = string("op_19324_groups_0"), val = int32(1)]; + tensor var_19324_strides_0 = const()[name = string("op_19324_strides_0"), val = tensor([1])]; + tensor var_19324_pad_0 = const()[name = string("op_19324_pad_0"), val = tensor([0, 0])]; + tensor var_19324_dilations_0 = const()[name = string("op_19324_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832659200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833544000))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_19309_cast_fp16 = transpose(perm = var_19308, x = attn_output_235_cast_fp16)[name = string("transpose_21")]; + tensor var_19324_cast_fp16 = conv(dilations = var_19324_dilations_0, groups = var_19324_groups_0, pad = var_19324_pad_0, pad_type = var_19324_pad_type_0, strides = var_19324_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_19309_cast_fp16)[name = string("op_19324_cast_fp16")]; + tensor var_19328 = const()[name = string("op_19328"), val = tensor([0, 2, 1])]; + int32 var_19339 = const()[name = string("op_19339"), val = int32(-1)]; + fp16 const_1000_promoted_to_fp16 = const()[name = string("const_1000_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_381_cast_fp16 = transpose(perm = var_19328, x = var_19324_cast_fp16)[name = string("transpose_20")]; + tensor var_19341_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_1000_promoted_to_fp16)[name = string("op_19341_cast_fp16")]; + bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; + tensor input_471_cast_fp16 = concat(axis = var_19339, interleave = input_471_interleave_0, values = (hidden_states_381_cast_fp16, var_19341_cast_fp16))[name = string("input_471_cast_fp16")]; + tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; + fp16 var_19336_to_fp16 = const()[name = string("op_19336_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_19336_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; + tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; + tensor var_19355_to_fp16 = const()[name = string("op_19355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833580928)))]; + tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_19355_to_fp16)[name = string("attn_output_239_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = add(x = hidden_states_373_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + int32 var_19368 = const()[name = string("op_19368"), val = int32(-1)]; + fp16 const_1004_promoted_to_fp16 = const()[name = string("const_1004_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19370_cast_fp16 = mul(x = hidden_states_383_cast_fp16, y = const_1004_promoted_to_fp16)[name = string("op_19370_cast_fp16")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473_cast_fp16 = concat(axis = var_19368, interleave = input_473_interleave_0, values = (hidden_states_383_cast_fp16, var_19370_cast_fp16))[name = string("input_473_cast_fp16")]; + tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; + fp16 var_19365_to_fp16 = const()[name = string("op_19365_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_19365_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; + tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; + tensor var_19384_to_fp16 = const()[name = string("op_19384_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833583296)))]; + tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_19384_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_19396 = const()[name = string("op_19396"), val = tensor([0, 2, 1])]; + tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; + tensor var_19397_cast_fp16 = transpose(perm = var_19396, x = x_381_cast_fp16)[name = string("transpose_19")]; + tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_19397_cast_fp16)[name = string("input_475_cast_fp16")]; + string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; + tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; + tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; + int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833585664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839557696))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839778944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845750976))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; + string var_19422_mode_0 = const()[name = string("op_19422_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_19422_cast_fp16 = gelu(mode = var_19422_mode_0, x = x_383_cast_fp16)[name = string("op_19422_cast_fp16")]; + tensor input_477_cast_fp16 = mul(x = var_19422_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845972224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851944256))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; + tensor var_19430_axes_0 = const()[name = string("op_19430_axes_0"), val = tensor([2])]; + tensor var_19430_cast_fp16 = squeeze(axes = var_19430_axes_0, x = e_47_cast_fp16)[name = string("op_19430_cast_fp16")]; + tensor var_19431 = const()[name = string("op_19431"), val = tensor([0, 2, 1])]; + int32 var_19442 = const()[name = string("op_19442"), val = int32(-1)]; + fp16 const_1008_promoted_to_fp16 = const()[name = string("const_1008_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_385_cast_fp16 = transpose(perm = var_19431, x = var_19430_cast_fp16)[name = string("transpose_18")]; + tensor var_19444_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_1008_promoted_to_fp16)[name = string("op_19444_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_19442, interleave = input_479_interleave_0, values = (hidden_states_385_cast_fp16, var_19444_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; + fp16 var_19439_to_fp16 = const()[name = string("op_19439_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_19439_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; + tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; + tensor var_19458_to_fp16 = const()[name = string("op_19458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851981184)))]; + tensor hidden_states_387_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_19458_to_fp16)[name = string("hidden_states_387_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_383_cast_fp16, y = hidden_states_387_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + int32 var_19512 = const()[name = string("op_19512"), val = int32(-1)]; + fp16 const_1013_promoted_to_fp16 = const()[name = string("const_1013_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19514_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = const_1013_promoted_to_fp16)[name = string("op_19514_cast_fp16")]; + bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; + tensor input_481_cast_fp16 = concat(axis = var_19512, interleave = input_481_interleave_0, values = (hidden_states_389_cast_fp16, var_19514_cast_fp16))[name = string("input_481_cast_fp16")]; + tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; + fp16 var_19509_to_fp16 = const()[name = string("op_19509_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_19509_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; + tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; + tensor var_19528_to_fp16 = const()[name = string("op_19528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851983552)))]; + tensor hidden_states_391_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_19528_to_fp16)[name = string("hidden_states_391_cast_fp16")]; + tensor var_19539 = const()[name = string("op_19539"), val = tensor([0, 2, 1])]; + tensor var_19542_axes_0 = const()[name = string("op_19542_axes_0"), val = tensor([2])]; + tensor var_19540_cast_fp16 = transpose(perm = var_19539, x = hidden_states_391_cast_fp16)[name = string("transpose_17")]; + tensor var_19542_cast_fp16 = expand_dims(axes = var_19542_axes_0, x = var_19540_cast_fp16)[name = string("op_19542_cast_fp16")]; + string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; + tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; + tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; + int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; + tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_19542_cast_fp16)[name = string("query_states_193")]; + string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; + tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; + tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; + int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; + tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_19542_cast_fp16)[name = string("key_states_241")]; + string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; + tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; + tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; + int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; + tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_19542_cast_fp16)[name = string("value_states_193")]; + tensor var_19584 = const()[name = string("op_19584"), val = tensor([1, 4, 256, 64])]; + tensor var_19585 = reshape(shape = var_19584, x = query_states_193)[name = string("op_19585")]; + tensor var_19590 = const()[name = string("op_19590"), val = tensor([0, 1, 3, 2])]; + tensor var_19595 = const()[name = string("op_19595"), val = tensor([1, 1, 256, 64])]; + tensor var_19596 = reshape(shape = var_19595, x = key_states_241)[name = string("op_19596")]; + tensor var_19601 = const()[name = string("op_19601"), val = tensor([0, 1, 3, 2])]; + tensor var_19606 = const()[name = string("op_19606"), val = tensor([1, 1, 256, 64])]; + tensor var_19607 = reshape(shape = var_19606, x = value_states_193)[name = string("op_19607")]; + tensor var_19612 = const()[name = string("op_19612"), val = tensor([0, 1, 3, 2])]; + int32 var_19623 = const()[name = string("op_19623"), val = int32(-1)]; + fp16 const_1018_promoted = const()[name = string("const_1018_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_393 = transpose(perm = var_19590, x = var_19585)[name = string("transpose_16")]; + tensor var_19625 = mul(x = hidden_states_393, y = const_1018_promoted)[name = string("op_19625")]; + bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; + tensor input_485 = concat(axis = var_19623, interleave = input_485_interleave_0, values = (hidden_states_393, var_19625))[name = string("input_485")]; + tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; + fp16 var_19620_to_fp16 = const()[name = string("op_19620_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_19620_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; + tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; + tensor var_19639_to_fp16 = const()[name = string("op_19639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851985920)))]; + tensor q_49_cast_fp16 = mul(x = normed_583, y = var_19639_to_fp16)[name = string("q_49_cast_fp16")]; + int32 var_19650 = const()[name = string("op_19650"), val = int32(-1)]; + fp16 const_1022_promoted = const()[name = string("const_1022_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_395 = transpose(perm = var_19601, x = var_19596)[name = string("transpose_15")]; + tensor var_19652 = mul(x = hidden_states_395, y = const_1022_promoted)[name = string("op_19652")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487 = concat(axis = var_19650, interleave = input_487_interleave_0, values = (hidden_states_395, var_19652))[name = string("input_487")]; + tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; + fp16 var_19647_to_fp16 = const()[name = string("op_19647_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_19647_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; + tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; + tensor var_19666_to_fp16 = const()[name = string("op_19666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851986496)))]; + tensor k_49_cast_fp16 = mul(x = normed_587, y = var_19666_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_19680_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5)[name = string("op_19680_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_1028_promoted_to_fp16 = const()[name = string("const_1028_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19701_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_1028_promoted_to_fp16)[name = string("op_19701_cast_fp16")]; + int32 var_19703 = const()[name = string("op_19703"), val = int32(-1)]; + bool var_19704_interleave_0 = const()[name = string("op_19704_interleave_0"), val = bool(false)]; + tensor var_19704_cast_fp16 = concat(axis = var_19703, interleave = var_19704_interleave_0, values = (var_19701_cast_fp16, x1_97_cast_fp16))[name = string("op_19704_cast_fp16")]; + tensor var_19705_cast_fp16 = mul(x = var_19704_cast_fp16, y = sin_5)[name = string("op_19705_cast_fp16")]; + tensor query_states_195_cast_fp16 = add(x = var_19680_cast_fp16, y = var_19705_cast_fp16)[name = string("query_states_195_cast_fp16")]; + tensor var_19708_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5)[name = string("op_19708_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_1031_promoted_to_fp16 = const()[name = string("const_1031_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_19729_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_1031_promoted_to_fp16)[name = string("op_19729_cast_fp16")]; + int32 var_19731 = const()[name = string("op_19731"), val = int32(-1)]; + bool var_19732_interleave_0 = const()[name = string("op_19732_interleave_0"), val = bool(false)]; + tensor var_19732_cast_fp16 = concat(axis = var_19731, interleave = var_19732_interleave_0, values = (var_19729_cast_fp16, x1_99_cast_fp16))[name = string("op_19732_cast_fp16")]; + tensor var_19733_cast_fp16 = mul(x = var_19732_cast_fp16, y = sin_5)[name = string("op_19733_cast_fp16")]; + tensor key_states_243_cast_fp16 = add(x = var_19708_cast_fp16, y = var_19733_cast_fp16)[name = string("key_states_243_cast_fp16")]; + tensor key_slice_41_begin_0 = const()[name = string("key_slice_41_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor key_slice_41_end_0 = const()[name = string("key_slice_41_end_0"), val = tensor([21, 1, 512, 256])]; + tensor key_slice_41_end_mask_0 = const()[name = string("key_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_41_cast_fp16 = slice_by_index(begin = key_slice_41_begin_0, end = key_slice_41_end_0, end_mask = key_slice_41_end_mask_0, x = coreml_update_state_97)[name = string("key_slice_41_cast_fp16")]; + tensor var_19770_begin_0 = const()[name = string("op_19770_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_19770_end_0 = const()[name = string("op_19770_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_19770_end_mask_0 = const()[name = string("op_19770_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_19770_cast_fp16 = slice_by_index(begin = var_19770_begin_0, end = var_19770_end_0, end_mask = var_19770_end_mask_0, x = key_slice_41_cast_fp16)[name = string("op_19770_cast_fp16")]; + int32 var_19797 = const()[name = string("op_19797"), val = int32(2)]; + bool shifted_key_41_interleave_0 = const()[name = string("shifted_key_41_interleave_0"), val = bool(false)]; + tensor shifted_key_41_cast_fp16 = concat(axis = var_19797, interleave = shifted_key_41_interleave_0, values = (var_19770_cast_fp16, key_states_243_cast_fp16))[name = string("shifted_key_41_cast_fp16")]; + tensor concat_336 = const()[name = string("concat_336"), val = tensor([20, 0, 0, 0])]; + tensor concat_337 = const()[name = string("concat_337"), val = tensor([21, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_336, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_337, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = shifted_key_41_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_204_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_204")]; + tensor value_slice_41_begin_0 = const()[name = string("value_slice_41_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor value_slice_41_end_0 = const()[name = string("value_slice_41_end_0"), val = tensor([43, 1, 512, 256])]; + tensor value_slice_41_end_mask_0 = const()[name = string("value_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor value_slice_41_cast_fp16 = slice_by_index(begin = value_slice_41_begin_0, end = value_slice_41_end_0, end_mask = value_slice_41_end_mask_0, x = coreml_update_state_100)[name = string("value_slice_41_cast_fp16")]; + tensor var_19840_begin_0 = const()[name = string("op_19840_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_19840_end_0 = const()[name = string("op_19840_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_19840_end_mask_0 = const()[name = string("op_19840_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_19840_cast_fp16 = slice_by_index(begin = var_19840_begin_0, end = var_19840_end_0, end_mask = var_19840_end_mask_0, x = value_slice_41_cast_fp16)[name = string("op_19840_cast_fp16")]; + int32 var_19867 = const()[name = string("op_19867"), val = int32(2)]; + bool shifted_value_41_interleave_0 = const()[name = string("shifted_value_41_interleave_0"), val = bool(false)]; + tensor value_states_195 = transpose(perm = var_19612, x = var_19607)[name = string("transpose_14")]; + tensor shifted_value_41_cast_fp16 = concat(axis = var_19867, interleave = shifted_value_41_interleave_0, values = (var_19840_cast_fp16, value_states_195))[name = string("shifted_value_41_cast_fp16")]; + tensor concat_338 = const()[name = string("concat_338"), val = tensor([42, 0, 0, 0])]; + tensor concat_339 = const()[name = string("concat_339"), val = tensor([43, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_338, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_339, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = shifted_value_41_cast_fp16, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_205_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_205")]; + tensor var_19895_begin_0 = const()[name = string("op_19895_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_19895_end_0 = const()[name = string("op_19895_end_0"), val = tensor([21, 1, 512, 256])]; + tensor var_19895_end_mask_0 = const()[name = string("op_19895_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_19895_cast_fp16 = slice_by_index(begin = var_19895_begin_0, end = var_19895_end_0, end_mask = var_19895_end_mask_0, x = coreml_update_state_101)[name = string("op_19895_cast_fp16")]; + tensor var_19902_begin_0 = const()[name = string("op_19902_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_19902_end_0 = const()[name = string("op_19902_end_0"), val = tensor([43, 1, 512, 256])]; + tensor var_19902_end_mask_0 = const()[name = string("op_19902_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_19902_cast_fp16 = slice_by_index(begin = var_19902_begin_0, end = var_19902_end_0, end_mask = var_19902_end_mask_0, x = coreml_update_state_101)[name = string("op_19902_cast_fp16")]; + tensor var_19941 = const()[name = string("op_19941"), val = tensor([1, 4, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_19941, x = var_19895_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_19961 = const()[name = string("op_19961"), val = tensor([1, 4, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_19961, x = var_19902_cast_fp16)[name = string("x_395_cast_fp16")]; + bool var_19988_transpose_x_0 = const()[name = string("op_19988_transpose_x_0"), val = bool(false)]; + bool var_19988_transpose_y_0 = const()[name = string("op_19988_transpose_y_0"), val = bool(true)]; + tensor var_19988 = matmul(transpose_x = var_19988_transpose_x_0, transpose_y = var_19988_transpose_y_0, x = query_states_195_cast_fp16, y = x_389_cast_fp16)[name = string("op_19988")]; + fp16 var_19989_to_fp16 = const()[name = string("op_19989_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_19988, y = var_19989_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = mask_slice_1)[name = string("attn_weights_99_cast_fp16")]; + int32 var_20024 = const()[name = string("op_20024"), val = int32(-1)]; + tensor var_20026_cast_fp16 = softmax(axis = var_20024, x = attn_weights_99_cast_fp16)[name = string("op_20026_cast_fp16")]; + tensor concat_344 = const()[name = string("concat_344"), val = tensor([4, 64, 512])]; + tensor reshape_72_cast_fp16 = reshape(shape = concat_344, x = var_20026_cast_fp16)[name = string("reshape_72_cast_fp16")]; + tensor concat_345 = const()[name = string("concat_345"), val = tensor([4, 512, 256])]; + tensor reshape_73_cast_fp16 = reshape(shape = concat_345, x = x_395_cast_fp16)[name = string("reshape_73_cast_fp16")]; + bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; + bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; + tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; + tensor concat_349 = const()[name = string("concat_349"), val = tensor([1, 4, 64, 256])]; + tensor reshape_74_cast_fp16 = reshape(shape = concat_349, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; + tensor var_20038_perm_0 = const()[name = string("op_20038_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_20057 = const()[name = string("op_20057"), val = tensor([1, 64, 1024])]; + tensor var_20038_cast_fp16 = transpose(perm = var_20038_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_13")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_20057, x = var_20038_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_20062 = const()[name = string("op_20062"), val = tensor([0, 2, 1])]; + string var_20078_pad_type_0 = const()[name = string("op_20078_pad_type_0"), val = string("valid")]; + int32 var_20078_groups_0 = const()[name = string("op_20078_groups_0"), val = int32(1)]; + tensor var_20078_strides_0 = const()[name = string("op_20078_strides_0"), val = tensor([1])]; + tensor var_20078_pad_0 = const()[name = string("op_20078_pad_0"), val = tensor([0, 0])]; + tensor var_20078_dilations_0 = const()[name = string("op_20078_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851987072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852871872))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_20063_cast_fp16 = transpose(perm = var_20062, x = attn_output_245_cast_fp16)[name = string("transpose_12")]; + tensor var_20078_cast_fp16 = conv(dilations = var_20078_dilations_0, groups = var_20078_groups_0, pad = var_20078_pad_0, pad_type = var_20078_pad_type_0, strides = var_20078_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_20063_cast_fp16)[name = string("op_20078_cast_fp16")]; + tensor var_20082 = const()[name = string("op_20082"), val = tensor([0, 2, 1])]; + int32 var_20093 = const()[name = string("op_20093"), val = int32(-1)]; + fp16 const_1042_promoted_to_fp16 = const()[name = string("const_1042_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_397_cast_fp16 = transpose(perm = var_20082, x = var_20078_cast_fp16)[name = string("transpose_11")]; + tensor var_20095_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_1042_promoted_to_fp16)[name = string("op_20095_cast_fp16")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491_cast_fp16 = concat(axis = var_20093, interleave = input_491_interleave_0, values = (hidden_states_397_cast_fp16, var_20095_cast_fp16))[name = string("input_491_cast_fp16")]; + tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; + fp16 var_20090_to_fp16 = const()[name = string("op_20090_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_20090_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; + tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; + tensor var_20109_to_fp16 = const()[name = string("op_20109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852908800)))]; + tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_20109_to_fp16)[name = string("attn_output_249_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + int32 var_20122 = const()[name = string("op_20122"), val = int32(-1)]; + fp16 const_1046_promoted_to_fp16 = const()[name = string("const_1046_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20124_cast_fp16 = mul(x = hidden_states_399_cast_fp16, y = const_1046_promoted_to_fp16)[name = string("op_20124_cast_fp16")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493_cast_fp16 = concat(axis = var_20122, interleave = input_493_interleave_0, values = (hidden_states_399_cast_fp16, var_20124_cast_fp16))[name = string("input_493_cast_fp16")]; + tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; + fp16 var_20119_to_fp16 = const()[name = string("op_20119_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_20119_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; + tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; + tensor var_20138_to_fp16 = const()[name = string("op_20138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852911168)))]; + tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_20138_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_20150 = const()[name = string("op_20150"), val = tensor([0, 2, 1])]; + tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; + tensor var_20151_cast_fp16 = transpose(perm = var_20150, x = x_397_cast_fp16)[name = string("transpose_10")]; + tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_20151_cast_fp16)[name = string("input_495_cast_fp16")]; + string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; + tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; + tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; + int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852913536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858885568))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859106816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865078848))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; + string var_20176_mode_0 = const()[name = string("op_20176_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_20176_cast_fp16 = gelu(mode = var_20176_mode_0, x = x_399_cast_fp16)[name = string("op_20176_cast_fp16")]; + tensor input_497_cast_fp16 = mul(x = var_20176_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865300096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871272128))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; + tensor var_20184_axes_0 = const()[name = string("op_20184_axes_0"), val = tensor([2])]; + tensor var_20184_cast_fp16 = squeeze(axes = var_20184_axes_0, x = e_49_cast_fp16)[name = string("op_20184_cast_fp16")]; + tensor var_20185 = const()[name = string("op_20185"), val = tensor([0, 2, 1])]; + int32 var_20196 = const()[name = string("op_20196"), val = int32(-1)]; + fp16 const_1050_promoted_to_fp16 = const()[name = string("const_1050_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_401_cast_fp16 = transpose(perm = var_20185, x = var_20184_cast_fp16)[name = string("transpose_9")]; + tensor var_20198_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_1050_promoted_to_fp16)[name = string("op_20198_cast_fp16")]; + bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; + tensor input_499_cast_fp16 = concat(axis = var_20196, interleave = input_499_interleave_0, values = (hidden_states_401_cast_fp16, var_20198_cast_fp16))[name = string("input_499_cast_fp16")]; + tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; + fp16 var_20193_to_fp16 = const()[name = string("op_20193_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_20193_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; + tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; + tensor var_20212_to_fp16 = const()[name = string("op_20212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871309056)))]; + tensor hidden_states_403_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_20212_to_fp16)[name = string("hidden_states_403_cast_fp16")]; + tensor hidden_states_405_cast_fp16 = add(x = hidden_states_399_cast_fp16, y = hidden_states_403_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + int32 var_20266 = const()[name = string("op_20266"), val = int32(-1)]; + fp16 const_1055_promoted_to_fp16 = const()[name = string("const_1055_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20268_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = const_1055_promoted_to_fp16)[name = string("op_20268_cast_fp16")]; + bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; + tensor input_501_cast_fp16 = concat(axis = var_20266, interleave = input_501_interleave_0, values = (hidden_states_405_cast_fp16, var_20268_cast_fp16))[name = string("input_501_cast_fp16")]; + tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; + fp16 var_20263_to_fp16 = const()[name = string("op_20263_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_20263_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; + tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; + tensor var_20282_to_fp16 = const()[name = string("op_20282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871311424)))]; + tensor hidden_states_407_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_20282_to_fp16)[name = string("hidden_states_407_cast_fp16")]; + tensor var_20293 = const()[name = string("op_20293"), val = tensor([0, 2, 1])]; + tensor var_20296_axes_0 = const()[name = string("op_20296_axes_0"), val = tensor([2])]; + tensor var_20294_cast_fp16 = transpose(perm = var_20293, x = hidden_states_407_cast_fp16)[name = string("transpose_8")]; + tensor var_20296_cast_fp16 = expand_dims(axes = var_20296_axes_0, x = var_20294_cast_fp16)[name = string("op_20296_cast_fp16")]; + string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; + tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; + tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; + int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; + tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_20296_cast_fp16)[name = string("query_states_201")]; + string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; + tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; + tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; + int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; + tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_20296_cast_fp16)[name = string("key_states_251")]; + string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; + tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; + tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; + int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; + tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_20296_cast_fp16)[name = string("value_states_201")]; + tensor var_20338 = const()[name = string("op_20338"), val = tensor([1, 4, 256, 64])]; + tensor var_20339 = reshape(shape = var_20338, x = query_states_201)[name = string("op_20339")]; + tensor var_20344 = const()[name = string("op_20344"), val = tensor([0, 1, 3, 2])]; + tensor var_20349 = const()[name = string("op_20349"), val = tensor([1, 1, 256, 64])]; + tensor var_20350 = reshape(shape = var_20349, x = key_states_251)[name = string("op_20350")]; + tensor var_20355 = const()[name = string("op_20355"), val = tensor([0, 1, 3, 2])]; + tensor var_20360 = const()[name = string("op_20360"), val = tensor([1, 1, 256, 64])]; + tensor var_20361 = reshape(shape = var_20360, x = value_states_201)[name = string("op_20361")]; + tensor var_20366 = const()[name = string("op_20366"), val = tensor([0, 1, 3, 2])]; + int32 var_20377 = const()[name = string("op_20377"), val = int32(-1)]; + fp16 const_1060_promoted = const()[name = string("const_1060_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_409 = transpose(perm = var_20344, x = var_20339)[name = string("transpose_7")]; + tensor var_20379 = mul(x = hidden_states_409, y = const_1060_promoted)[name = string("op_20379")]; + bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; + tensor input_505 = concat(axis = var_20377, interleave = input_505_interleave_0, values = (hidden_states_409, var_20379))[name = string("input_505")]; + tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; + fp16 var_20374_to_fp16 = const()[name = string("op_20374_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_20374_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; + tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 64, 256])]; + tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; + tensor var_20393_to_fp16 = const()[name = string("op_20393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871313792)))]; + tensor q_cast_fp16 = mul(x = normed_607, y = var_20393_to_fp16)[name = string("q_cast_fp16")]; + int32 var_20404 = const()[name = string("op_20404"), val = int32(-1)]; + fp16 const_1064_promoted = const()[name = string("const_1064_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_411 = transpose(perm = var_20355, x = var_20350)[name = string("transpose_6")]; + tensor var_20406 = mul(x = hidden_states_411, y = const_1064_promoted)[name = string("op_20406")]; + bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; + tensor input_507 = concat(axis = var_20404, interleave = input_507_interleave_0, values = (hidden_states_411, var_20406))[name = string("input_507")]; + tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; + fp16 var_20401_to_fp16 = const()[name = string("op_20401_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_20401_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; + tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 64, 256])]; + tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; + tensor var_20420_to_fp16 = const()[name = string("op_20420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314368)))]; + tensor k_cast_fp16 = mul(x = normed_611, y = var_20420_to_fp16)[name = string("k_cast_fp16")]; + tensor var_20434_cast_fp16 = mul(x = q_cast_fp16, y = cos_5)[name = string("op_20434_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 64, 128])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 64, 256])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_1070_promoted_to_fp16 = const()[name = string("const_1070_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20455_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_1070_promoted_to_fp16)[name = string("op_20455_cast_fp16")]; + int32 var_20457 = const()[name = string("op_20457"), val = int32(-1)]; + bool var_20458_interleave_0 = const()[name = string("op_20458_interleave_0"), val = bool(false)]; + tensor var_20458_cast_fp16 = concat(axis = var_20457, interleave = var_20458_interleave_0, values = (var_20455_cast_fp16, x1_101_cast_fp16))[name = string("op_20458_cast_fp16")]; + tensor var_20459_cast_fp16 = mul(x = var_20458_cast_fp16, y = sin_5)[name = string("op_20459_cast_fp16")]; + tensor query_states_203_cast_fp16 = add(x = var_20434_cast_fp16, y = var_20459_cast_fp16)[name = string("query_states_203_cast_fp16")]; + tensor var_20462_cast_fp16 = mul(x = k_cast_fp16, y = cos_5)[name = string("op_20462_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 64, 128])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 64, 256])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_1073_promoted_to_fp16 = const()[name = string("const_1073_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20483_cast_fp16 = mul(x = x2_cast_fp16, y = const_1073_promoted_to_fp16)[name = string("op_20483_cast_fp16")]; + int32 var_20485 = const()[name = string("op_20485"), val = int32(-1)]; + bool var_20486_interleave_0 = const()[name = string("op_20486_interleave_0"), val = bool(false)]; + tensor var_20486_cast_fp16 = concat(axis = var_20485, interleave = var_20486_interleave_0, values = (var_20483_cast_fp16, x1_cast_fp16))[name = string("op_20486_cast_fp16")]; + tensor var_20487_cast_fp16 = mul(x = var_20486_cast_fp16, y = sin_5)[name = string("op_20487_cast_fp16")]; + tensor key_states_253_cast_fp16 = add(x = var_20462_cast_fp16, y = var_20487_cast_fp16)[name = string("key_states_253_cast_fp16")]; + tensor key_slice_begin_0 = const()[name = string("key_slice_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor key_slice_end_0 = const()[name = string("key_slice_end_0"), val = tensor([22, 1, 512, 256])]; + tensor key_slice_end_mask_0 = const()[name = string("key_slice_end_mask_0"), val = tensor([false, true, true, true])]; + tensor key_slice_cast_fp16 = slice_by_index(begin = key_slice_begin_0, end = key_slice_end_0, end_mask = key_slice_end_mask_0, x = coreml_update_state_101)[name = string("key_slice_cast_fp16")]; + tensor var_20524_begin_0 = const()[name = string("op_20524_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_20524_end_0 = const()[name = string("op_20524_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_20524_end_mask_0 = const()[name = string("op_20524_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_20524_cast_fp16 = slice_by_index(begin = var_20524_begin_0, end = var_20524_end_0, end_mask = var_20524_end_mask_0, x = key_slice_cast_fp16)[name = string("op_20524_cast_fp16")]; + int32 var_20551 = const()[name = string("op_20551"), val = int32(2)]; + bool shifted_key_interleave_0 = const()[name = string("shifted_key_interleave_0"), val = bool(false)]; + tensor shifted_key_cast_fp16 = concat(axis = var_20551, interleave = shifted_key_interleave_0, values = (var_20524_cast_fp16, key_states_253_cast_fp16))[name = string("shifted_key_cast_fp16")]; + tensor concat_350 = const()[name = string("concat_350"), val = tensor([21, 0, 0, 0])]; + tensor concat_351 = const()[name = string("concat_351"), val = tensor([22, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_350, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_351, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = shifted_key_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_206_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_206")]; + tensor value_slice_begin_0 = const()[name = string("value_slice_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor value_slice_end_0 = const()[name = string("value_slice_end_0"), val = tensor([1, 1, 512, 256])]; + tensor value_slice_end_mask_0 = const()[name = string("value_slice_end_mask_0"), val = tensor([true, true, true, true])]; + tensor value_slice_cast_fp16 = slice_by_index(begin = value_slice_begin_0, end = value_slice_end_0, end_mask = value_slice_end_mask_0, x = coreml_update_state_102)[name = string("value_slice_cast_fp16")]; + tensor var_20594_begin_0 = const()[name = string("op_20594_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_20594_end_0 = const()[name = string("op_20594_end_0"), val = tensor([1, 1, 1, 256])]; + tensor var_20594_end_mask_0 = const()[name = string("op_20594_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_20594_cast_fp16 = slice_by_index(begin = var_20594_begin_0, end = var_20594_end_0, end_mask = var_20594_end_mask_0, x = value_slice_cast_fp16)[name = string("op_20594_cast_fp16")]; + int32 var_20621 = const()[name = string("op_20621"), val = int32(2)]; + bool shifted_value_interleave_0 = const()[name = string("shifted_value_interleave_0"), val = bool(false)]; + tensor value_states_203 = transpose(perm = var_20366, x = var_20361)[name = string("transpose_5")]; + tensor shifted_value_cast_fp16 = concat(axis = var_20621, interleave = shifted_value_interleave_0, values = (var_20594_cast_fp16, value_states_203))[name = string("shifted_value_cast_fp16")]; + tensor concat_352 = const()[name = string("concat_352"), val = tensor([43, 0, 0, 0])]; + tensor concat_353 = const()[name = string("concat_353"), val = tensor([44, 0, 0, 0])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, true, true])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_352, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_353, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = shifted_value_cast_fp16, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_207_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_207")]; + tensor var_20649_begin_0 = const()[name = string("op_20649_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_20649_end_0 = const()[name = string("op_20649_end_0"), val = tensor([22, 1, 512, 256])]; + tensor var_20649_end_mask_0 = const()[name = string("op_20649_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_20649_cast_fp16 = slice_by_index(begin = var_20649_begin_0, end = var_20649_end_0, end_mask = var_20649_end_mask_0, x = coreml_update_state_103)[name = string("op_20649_cast_fp16")]; + tensor var_20656_begin_0 = const()[name = string("op_20656_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_20656_end_0 = const()[name = string("op_20656_end_0"), val = tensor([1, 1, 512, 256])]; + tensor var_20656_end_mask_0 = const()[name = string("op_20656_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_20656_cast_fp16 = slice_by_index(begin = var_20656_begin_0, end = var_20656_end_0, end_mask = var_20656_end_mask_0, x = coreml_update_state_103)[name = string("op_20656_cast_fp16")]; + tensor var_20695 = const()[name = string("op_20695"), val = tensor([1, 4, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_20695, x = var_20649_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_20715 = const()[name = string("op_20715"), val = tensor([1, 4, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_20715, x = var_20656_cast_fp16)[name = string("x_411_cast_fp16")]; + bool var_20742_transpose_x_0 = const()[name = string("op_20742_transpose_x_0"), val = bool(false)]; + bool var_20742_transpose_y_0 = const()[name = string("op_20742_transpose_y_0"), val = bool(true)]; + tensor var_20742 = matmul(transpose_x = var_20742_transpose_x_0, transpose_y = var_20742_transpose_y_0, x = query_states_203_cast_fp16, y = x_405_cast_fp16)[name = string("op_20742")]; + fp16 var_20743_to_fp16 = const()[name = string("op_20743_to_fp16"), val = fp16(0x1p-4)]; + tensor attn_weights_101_cast_fp16 = mul(x = var_20742, y = var_20743_to_fp16)[name = string("attn_weights_101_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = mask_slice_1)[name = string("attn_weights_cast_fp16")]; + int32 var_20778 = const()[name = string("op_20778"), val = int32(-1)]; + tensor var_20780_cast_fp16 = softmax(axis = var_20778, x = attn_weights_cast_fp16)[name = string("op_20780_cast_fp16")]; + tensor concat_358 = const()[name = string("concat_358"), val = tensor([4, 64, 512])]; + tensor reshape_75_cast_fp16 = reshape(shape = concat_358, x = var_20780_cast_fp16)[name = string("reshape_75_cast_fp16")]; + tensor concat_359 = const()[name = string("concat_359"), val = tensor([4, 512, 256])]; + tensor reshape_76_cast_fp16 = reshape(shape = concat_359, x = x_411_cast_fp16)[name = string("reshape_76_cast_fp16")]; + bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; + bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; + tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; + tensor concat_363 = const()[name = string("concat_363"), val = tensor([1, 4, 64, 256])]; + tensor reshape_77_cast_fp16 = reshape(shape = concat_363, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; + tensor var_20792_perm_0 = const()[name = string("op_20792_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_20811 = const()[name = string("op_20811"), val = tensor([1, 64, 1024])]; + tensor var_20792_cast_fp16 = transpose(perm = var_20792_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_4")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_20811, x = var_20792_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_20816 = const()[name = string("op_20816"), val = tensor([0, 2, 1])]; + string var_20832_pad_type_0 = const()[name = string("op_20832_pad_type_0"), val = string("valid")]; + int32 var_20832_groups_0 = const()[name = string("op_20832_groups_0"), val = int32(1)]; + tensor var_20832_strides_0 = const()[name = string("op_20832_strides_0"), val = tensor([1])]; + tensor var_20832_pad_0 = const()[name = string("op_20832_pad_0"), val = tensor([0, 0])]; + tensor var_20832_dilations_0 = const()[name = string("op_20832_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871314944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872199744))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_20817_cast_fp16 = transpose(perm = var_20816, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; + tensor var_20832_cast_fp16 = conv(dilations = var_20832_dilations_0, groups = var_20832_groups_0, pad = var_20832_pad_0, pad_type = var_20832_pad_type_0, strides = var_20832_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_20817_cast_fp16)[name = string("op_20832_cast_fp16")]; + tensor var_20836 = const()[name = string("op_20836"), val = tensor([0, 2, 1])]; + int32 var_20847 = const()[name = string("op_20847"), val = int32(-1)]; + fp16 const_1084_promoted_to_fp16 = const()[name = string("const_1084_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_413_cast_fp16 = transpose(perm = var_20836, x = var_20832_cast_fp16)[name = string("transpose_2")]; + tensor var_20849_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_1084_promoted_to_fp16)[name = string("op_20849_cast_fp16")]; + bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; + tensor input_511_cast_fp16 = concat(axis = var_20847, interleave = input_511_interleave_0, values = (hidden_states_413_cast_fp16, var_20849_cast_fp16))[name = string("input_511_cast_fp16")]; + tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; + fp16 var_20844_to_fp16 = const()[name = string("op_20844_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_20844_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; + tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; + tensor var_20863_to_fp16 = const()[name = string("op_20863_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872236672)))]; + tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_20863_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; + int32 var_20876 = const()[name = string("op_20876"), val = int32(-1)]; + fp16 const_1088_promoted_to_fp16 = const()[name = string("const_1088_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20878_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = const_1088_promoted_to_fp16)[name = string("op_20878_cast_fp16")]; + bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; + tensor input_513_cast_fp16 = concat(axis = var_20876, interleave = input_513_interleave_0, values = (hidden_states_415_cast_fp16, var_20878_cast_fp16))[name = string("input_513_cast_fp16")]; + tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; + fp16 var_20873_to_fp16 = const()[name = string("op_20873_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_20873_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; + tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; + tensor var_20892_to_fp16 = const()[name = string("op_20892_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872239040)))]; + tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_20892_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_20904 = const()[name = string("op_20904"), val = tensor([0, 2, 1])]; + tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; + tensor var_20905_cast_fp16 = transpose(perm = var_20904, x = x_413_cast_fp16)[name = string("transpose_1")]; + tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_20905_cast_fp16)[name = string("input_515_cast_fp16")]; + string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; + tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; + tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; + int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872241408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878213440))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; + tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878434688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884406720))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; + tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; + string var_20930_mode_0 = const()[name = string("op_20930_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_20930_cast_fp16 = gelu(mode = var_20930_mode_0, x = x_cast_fp16)[name = string("op_20930_cast_fp16")]; + tensor input_517_cast_fp16 = mul(x = var_20930_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884627968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890600000))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; + tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; + tensor var_20938_axes_0 = const()[name = string("op_20938_axes_0"), val = tensor([2])]; + tensor var_20938_cast_fp16 = squeeze(axes = var_20938_axes_0, x = e_cast_fp16)[name = string("op_20938_cast_fp16")]; + tensor var_20939 = const()[name = string("op_20939"), val = tensor([0, 2, 1])]; + int32 var_20950 = const()[name = string("op_20950"), val = int32(-1)]; + fp16 const_1092_promoted_to_fp16 = const()[name = string("const_1092_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor hidden_states_417_cast_fp16 = transpose(perm = var_20939, x = var_20938_cast_fp16)[name = string("transpose_0")]; + tensor var_20952_cast_fp16 = mul(x = hidden_states_417_cast_fp16, y = const_1092_promoted_to_fp16)[name = string("op_20952_cast_fp16")]; + bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; + tensor input_519_cast_fp16 = concat(axis = var_20950, interleave = input_519_interleave_0, values = (hidden_states_417_cast_fp16, var_20952_cast_fp16))[name = string("input_519_cast_fp16")]; + tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; + fp16 var_20947_to_fp16 = const()[name = string("op_20947_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_20947_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; + tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; + tensor var_20966_to_fp16 = const()[name = string("op_20966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890636928)))]; + tensor hidden_states_419_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_20966_to_fp16)[name = string("hidden_states_419_cast_fp16")]; + tensor hidden_states_421_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; + int32 var_20979 = const()[name = string("op_20979"), val = int32(-1)]; + fp16 const_1096_promoted_to_fp16 = const()[name = string("const_1096_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_20981_cast_fp16 = mul(x = hidden_states_421_cast_fp16, y = const_1096_promoted_to_fp16)[name = string("op_20981_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_20979, interleave = input_interleave_0, values = (hidden_states_421_cast_fp16, var_20981_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; + fp16 var_20976_to_fp16 = const()[name = string("op_20976_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_20976_to_fp16, x = input_cast_fp16)[name = string("normed_625_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 64, 1152])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; + tensor var_20995_to_fp16 = const()[name = string("op_20995_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890639296)))]; + tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_20995_to_fp16)[name = string("hidden_states_cast_fp16")]; + tensor var_21006_begin_0 = const()[name = string("op_21006_begin_0"), val = tensor([0, 0, 0])]; + tensor var_21006_end_0 = const()[name = string("op_21006_end_0"), val = tensor([1, 1, 1152])]; + tensor var_21006_end_mask_0 = const()[name = string("op_21006_end_mask_0"), val = tensor([true, false, true])]; + tensor output_hidden_states = slice_by_index(begin = var_21006_begin_0, end = var_21006_end_0, end_mask = var_21006_end_mask_0, x = hidden_states_cast_fp16)[name = string("op_21006_cast_fp16")]; + tensor current_pos_tmp = identity(x = current_pos)[name = string("current_pos_tmp")]; + } -> (output_hidden_states); +} \ No newline at end of file