program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.11.1"}, {"coremlc-version", "3500.21.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5243008))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5259456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6570240))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6574400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885184))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20341248))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20380224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32832128))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32871104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45323008))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50576256))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50592704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51903488))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51907648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53218432))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53222592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65674496))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78165376))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78204352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90656256))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90666560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95909504))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97236736))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97240896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98551680))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98555840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111007744))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111046720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123498624))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123537600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135989504))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135999808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141242752))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141259200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142569984))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142574144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143884928))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143889088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156340992))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156379968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168831872))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168870848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181322752))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181333056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186576000))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186592448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187903232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187907392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189218176))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189222336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201674240))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201713216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214165120))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214204096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226656000))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226666304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231909248))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231925696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233236480))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233240640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234551424))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234555584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247007488))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247046464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259498368))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259537344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271989248))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271999552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277242496))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278569728))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278573888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279884672))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279888832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292340736))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292379712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304831616))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304870592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317322496))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317332800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322575744))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322592192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323902976))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323907136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325217920))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325222080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337673984))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337712960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350164864))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350203840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362655744))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362666048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367908992))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367925440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369236224))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369240384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370551168))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370555328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383007232))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383046208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395498112))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395537088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407988992))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407999296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413242240))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413258688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414569472))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414573632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415884416))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415888576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428340480))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428379456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440831360))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440870336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453322240))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453332544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458575488))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458591936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459902720))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459906880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461217664))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461221824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473673728))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473712704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486164608))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486203584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498655488))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503908736))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503925184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505235968))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505240128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506550912))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506555072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519006976))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519045952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531497856))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531536832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543988736))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543999040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549241984))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549258432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550569216))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550573376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551884160))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551888320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564340224))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564379200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576831104))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576870080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589321984))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589332288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594575232))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594591680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595902464))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595906624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597217408))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597221568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609673472))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609712448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622164352))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622203328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634655232))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634665536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639908480))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639924928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641235712))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641239872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642550656))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642554816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655006720))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655045696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667497600))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667536576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679988480))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685241728))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685258176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686568960))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686573120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687883904))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700339968))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712830848))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712869824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725321728))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725332032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730574976))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731902208))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731906368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733217152))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733221312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745673216))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745712192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758164096))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758203072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770654976))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770665280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775908224))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775924672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777235456))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777239616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778550400))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778554560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791006464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791045440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803497344))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803536320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815988224))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_973_axis_0 = const()[name = string("op_973_axis_0"), val = int32(1)]; int32 var_973_batch_dims_0 = const()[name = string("op_973_batch_dims_0"), val = int32(0)]; bool var_973_validate_indices_0 = const()[name = string("op_973_validate_indices_0"), val = bool(false)]; tensor var_965_to_fp16 = const()[name = string("op_965_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815998528)))]; tensor var_973_cast_fp16 = gather(axis = var_973_axis_0, batch_dims = var_973_batch_dims_0, indices = select_0, validate_indices = var_973_validate_indices_0, x = var_965_to_fp16)[name = string("op_973_cast_fp16")]; tensor var_978 = const()[name = string("op_978"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_978, x = var_973_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_988_axis_0 = const()[name = string("op_988_axis_0"), val = int32(1)]; int32 var_988_batch_dims_0 = const()[name = string("op_988_batch_dims_0"), val = int32(0)]; bool var_988_validate_indices_0 = const()[name = string("op_988_validate_indices_0"), val = bool(false)]; tensor var_980_to_fp16 = const()[name = string("op_980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832775808)))]; tensor var_988_cast_fp16 = gather(axis = var_988_axis_0, batch_dims = var_988_batch_dims_0, indices = select_0, validate_indices = var_988_validate_indices_0, x = var_980_to_fp16)[name = string("op_988_cast_fp16")]; tensor var_993 = const()[name = string("op_993"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_993, x = var_988_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1020_axes_0 = const()[name = string("op_1020_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849553088)))]; fp16 var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1020_cast_fp16 = layer_norm(axes = var_1020_axes_0, epsilon = var_1008_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1026 = const()[name = string("op_1026"), val = tensor([0, 2, 1])]; tensor var_1029_axes_0 = const()[name = string("op_1029_axes_0"), val = tensor([2])]; tensor var_1027 = transpose(perm = var_1026, x = var_1020_cast_fp16)[name = string("transpose_107")]; tensor var_1029 = expand_dims(axes = var_1029_axes_0, x = var_1027)[name = string("op_1029")]; string var_1045_pad_type_0 = const()[name = string("op_1045_pad_type_0"), val = string("valid")]; tensor var_1045_strides_0 = const()[name = string("op_1045_strides_0"), val = tensor([1, 1])]; tensor var_1045_pad_0 = const()[name = string("op_1045_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1045_dilations_0 = const()[name = string("op_1045_dilations_0"), val = tensor([1, 1])]; int32 var_1045_groups_0 = const()[name = string("op_1045_groups_0"), val = int32(1)]; tensor var_1045 = conv(dilations = var_1045_dilations_0, groups = var_1045_groups_0, pad = var_1045_pad_0, pad_type = var_1045_pad_type_0, strides = var_1045_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1029)[name = string("op_1045")]; tensor var_1050 = const()[name = string("op_1050"), val = tensor([1, 32, 1, 128])]; tensor var_1051 = reshape(shape = var_1050, x = var_1045)[name = string("op_1051")]; string var_1067_pad_type_0 = const()[name = string("op_1067_pad_type_0"), val = string("valid")]; tensor var_1067_strides_0 = const()[name = string("op_1067_strides_0"), val = tensor([1, 1])]; tensor var_1067_pad_0 = const()[name = string("op_1067_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1067_dilations_0 = const()[name = string("op_1067_dilations_0"), val = tensor([1, 1])]; int32 var_1067_groups_0 = const()[name = string("op_1067_groups_0"), val = int32(1)]; tensor var_1067 = conv(dilations = var_1067_dilations_0, groups = var_1067_groups_0, pad = var_1067_pad_0, pad_type = var_1067_pad_type_0, strides = var_1067_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1029)[name = string("op_1067")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([1, 8, 1, 128])]; tensor var_1073 = reshape(shape = var_1072, x = var_1067)[name = string("op_1073")]; string var_1089_pad_type_0 = const()[name = string("op_1089_pad_type_0"), val = string("valid")]; tensor var_1089_strides_0 = const()[name = string("op_1089_strides_0"), val = tensor([1, 1])]; tensor var_1089_pad_0 = const()[name = string("op_1089_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1089_dilations_0 = const()[name = string("op_1089_dilations_0"), val = tensor([1, 1])]; int32 var_1089_groups_0 = const()[name = string("op_1089_groups_0"), val = int32(1)]; tensor var_1089 = conv(dilations = var_1089_dilations_0, groups = var_1089_groups_0, pad = var_1089_pad_0, pad_type = var_1089_pad_type_0, strides = var_1089_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1029)[name = string("op_1089")]; tensor var_1094 = const()[name = string("op_1094"), val = tensor([1, 8, 1, 128])]; tensor var_1095 = reshape(shape = var_1094, x = var_1089)[name = string("op_1095")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = var_1051)[name = string("mean_3")]; tensor input_5 = sub(x = var_1051, y = mean_3)[name = string("input_5")]; tensor var_1116_axes_0 = const()[name = string("op_1116_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558272)))]; fp16 var_1104_to_fp16 = const()[name = string("op_1104_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1116_cast_fp16 = layer_norm(axes = var_1116_axes_0, epsilon = var_1104_to_fp16, gamma = model_model_layers_0_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1116_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = var_1073)[name = string("mean_5")]; tensor input_7 = sub(x = var_1073, y = mean_5)[name = string("input_7")]; tensor var_1134_axes_0 = const()[name = string("op_1134_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558592)))]; fp16 var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1134_cast_fp16 = layer_norm(axes = var_1134_axes_0, epsilon = var_1122_to_fp16, gamma = model_model_layers_0_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1134_cast_fp16")]; tensor var_1137 = mul(x = var_1116_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1137")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1116_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1116_cast_fp16)[name = string("x2_1")]; fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; tensor var_1158 = mul(x = x2_1, y = const_5_promoted)[name = string("op_1158")]; int32 var_1160 = const()[name = string("op_1160"), val = int32(-1)]; bool var_1161_interleave_0 = const()[name = string("op_1161_interleave_0"), val = bool(false)]; tensor var_1161 = concat(axis = var_1160, interleave = var_1161_interleave_0, values = (var_1158, x1_1))[name = string("op_1161")]; tensor var_1162 = mul(x = var_1161, y = sin_1_cast_fp16)[name = string("op_1162")]; tensor query_states_1 = add(x = var_1137, y = var_1162)[name = string("query_states_1")]; tensor var_1165 = mul(x = var_1134_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1165")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1134_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1134_cast_fp16)[name = string("x2_3")]; fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; tensor var_1186 = mul(x = x2_3, y = const_8_promoted)[name = string("op_1186")]; int32 var_1188 = const()[name = string("op_1188"), val = int32(-1)]; bool var_1189_interleave_0 = const()[name = string("op_1189_interleave_0"), val = bool(false)]; tensor var_1189 = concat(axis = var_1188, interleave = var_1189_interleave_0, values = (var_1186, x1_3))[name = string("op_1189")]; tensor var_1190 = mul(x = var_1189, y = sin_1_cast_fp16)[name = string("op_1190")]; tensor key_states_1 = add(x = var_1165, y = var_1190)[name = string("key_states_1")]; int32 var_1194 = const()[name = string("op_1194"), val = int32(1)]; tensor var_1195 = add(x = current_pos, y = var_1194)[name = string("op_1195")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1195, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_1, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([36])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([37])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1195, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1095, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_1245_begin_0 = const()[name = string("op_1245_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1245_end_0 = const()[name = string("op_1245_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_1245_end_mask_0 = const()[name = string("op_1245_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1245_cast_fp16 = slice_by_index(begin = var_1245_begin_0, end = var_1245_end_0, end_mask = var_1245_end_mask_0, x = coreml_update_state_37)[name = string("op_1245_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1245_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1252_begin_0 = const()[name = string("op_1252_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_1252_end_0 = const()[name = string("op_1252_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_1252_end_mask_0 = const()[name = string("op_1252_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = coreml_update_state_37)[name = string("op_1252_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1252_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1289 = const()[name = string("op_1289"), val = tensor([1, 4, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1289, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1301 = const()[name = string("op_1301"), val = tensor([1, -1, 1024, 128])]; tensor key_states_3_cast_fp16 = reshape(shape = var_1301, x = x_9_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1309 = const()[name = string("op_1309"), val = tensor([1, 4, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1309, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_1321 = const()[name = string("op_1321"), val = tensor([1, -1, 1024, 128])]; tensor value_states_3_cast_fp16 = reshape(shape = var_1321, x = x_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_1336_transpose_x_1 = const()[name = string("op_1336_transpose_x_1"), val = bool(false)]; bool var_1336_transpose_y_1 = const()[name = string("op_1336_transpose_y_1"), val = bool(true)]; tensor var_1336 = matmul(transpose_x = var_1336_transpose_x_1, transpose_y = var_1336_transpose_y_1, x = query_states_1, y = key_states_3_cast_fp16)[name = string("op_1336")]; fp16 var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1336, y = var_1337_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1372 = const()[name = string("op_1372"), val = int32(-1)]; tensor attn_weights_5_cast_fp16 = softmax(axis = var_1372, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_1383_perm_0 = const()[name = string("op_1383_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1387 = const()[name = string("op_1387"), val = tensor([1, 1, 4096])]; tensor var_1383_cast_fp16 = transpose(perm = var_1383_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_106")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1387, x = var_1383_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1392 = const()[name = string("op_1392"), val = tensor([0, 2, 1])]; string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")]; int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)]; tensor var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor([1])]; tensor var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor([0, 0])]; tensor var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854801856))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1393_cast_fp16 = transpose(perm = var_1392, x = attn_output_5_cast_fp16)[name = string("transpose_105")]; tensor var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1393_cast_fp16)[name = string("op_1408_cast_fp16")]; tensor var_1412 = const()[name = string("op_1412"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1412, x = var_1408_cast_fp16)[name = string("transpose_104")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_1431_axes_0 = const()[name = string("op_1431_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854812160)))]; fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1431_cast_fp16 = layer_norm(axes = var_1431_axes_0, epsilon = var_1419_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_1431_cast_fp16")]; tensor var_1445 = const()[name = string("op_1445"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_1446 = transpose(perm = var_1445, x = var_1431_cast_fp16)[name = string("transpose_103")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_1446)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_1468_axes_0 = const()[name = string("op_1468_axes_0"), val = tensor([2])]; tensor var_1468 = squeeze(axes = var_1468_axes_0, x = e_1)[name = string("op_1468")]; tensor var_1469 = const()[name = string("op_1469"), val = tensor([0, 2, 1])]; tensor var_1470 = transpose(perm = var_1469, x = var_1468)[name = string("transpose_102")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_1470)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_1488_axes_0 = const()[name = string("op_1488_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854817344)))]; fp16 var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1488_cast_fp16 = layer_norm(axes = var_1488_axes_0, epsilon = var_1476_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_1488_cast_fp16")]; tensor var_1494 = const()[name = string("op_1494"), val = tensor([0, 2, 1])]; tensor var_1497_axes_0 = const()[name = string("op_1497_axes_0"), val = tensor([2])]; tensor var_1495 = transpose(perm = var_1494, x = var_1488_cast_fp16)[name = string("transpose_101")]; tensor var_1497 = expand_dims(axes = var_1497_axes_0, x = var_1495)[name = string("op_1497")]; string var_1513_pad_type_0 = const()[name = string("op_1513_pad_type_0"), val = string("valid")]; tensor var_1513_strides_0 = const()[name = string("op_1513_strides_0"), val = tensor([1, 1])]; tensor var_1513_pad_0 = const()[name = string("op_1513_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1513_dilations_0 = const()[name = string("op_1513_dilations_0"), val = tensor([1, 1])]; int32 var_1513_groups_0 = const()[name = string("op_1513_groups_0"), val = int32(1)]; tensor var_1513 = conv(dilations = var_1513_dilations_0, groups = var_1513_groups_0, pad = var_1513_pad_0, pad_type = var_1513_pad_type_0, strides = var_1513_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_1497)[name = string("op_1513")]; tensor var_1518 = const()[name = string("op_1518"), val = tensor([1, 32, 1, 128])]; tensor var_1519 = reshape(shape = var_1518, x = var_1513)[name = string("op_1519")]; string var_1535_pad_type_0 = const()[name = string("op_1535_pad_type_0"), val = string("valid")]; tensor var_1535_strides_0 = const()[name = string("op_1535_strides_0"), val = tensor([1, 1])]; tensor var_1535_pad_0 = const()[name = string("op_1535_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1535_dilations_0 = const()[name = string("op_1535_dilations_0"), val = tensor([1, 1])]; int32 var_1535_groups_0 = const()[name = string("op_1535_groups_0"), val = int32(1)]; tensor var_1535 = conv(dilations = var_1535_dilations_0, groups = var_1535_groups_0, pad = var_1535_pad_0, pad_type = var_1535_pad_type_0, strides = var_1535_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_1497)[name = string("op_1535")]; tensor var_1540 = const()[name = string("op_1540"), val = tensor([1, 8, 1, 128])]; tensor var_1541 = reshape(shape = var_1540, x = var_1535)[name = string("op_1541")]; string var_1557_pad_type_0 = const()[name = string("op_1557_pad_type_0"), val = string("valid")]; tensor var_1557_strides_0 = const()[name = string("op_1557_strides_0"), val = tensor([1, 1])]; tensor var_1557_pad_0 = const()[name = string("op_1557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1557_dilations_0 = const()[name = string("op_1557_dilations_0"), val = tensor([1, 1])]; int32 var_1557_groups_0 = const()[name = string("op_1557_groups_0"), val = int32(1)]; tensor var_1557 = conv(dilations = var_1557_dilations_0, groups = var_1557_groups_0, pad = var_1557_pad_0, pad_type = var_1557_pad_type_0, strides = var_1557_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_1497)[name = string("op_1557")]; tensor var_1562 = const()[name = string("op_1562"), val = tensor([1, 8, 1, 128])]; tensor var_1563 = reshape(shape = var_1562, x = var_1557)[name = string("op_1563")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = var_1519)[name = string("mean_11")]; tensor input_23 = sub(x = var_1519, y = mean_11)[name = string("input_23")]; tensor var_1584_axes_0 = const()[name = string("op_1584_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822528)))]; fp16 var_1572_to_fp16 = const()[name = string("op_1572_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1584_cast_fp16 = layer_norm(axes = var_1584_axes_0, epsilon = var_1572_to_fp16, gamma = model_model_layers_1_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_1584_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = var_1541)[name = string("mean_13")]; tensor input_25 = sub(x = var_1541, y = mean_13)[name = string("input_25")]; tensor var_1602_axes_0 = const()[name = string("op_1602_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822848)))]; fp16 var_1590_to_fp16 = const()[name = string("op_1590_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1602_cast_fp16 = layer_norm(axes = var_1602_axes_0, epsilon = var_1590_to_fp16, gamma = model_model_layers_1_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_1602_cast_fp16")]; tensor var_1605 = mul(x = var_1584_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1605")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_1584_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_1584_cast_fp16)[name = string("x2_5")]; fp16 const_23_promoted = const()[name = string("const_23_promoted"), val = fp16(-0x1p+0)]; tensor var_1626 = mul(x = x2_5, y = const_23_promoted)[name = string("op_1626")]; int32 var_1628 = const()[name = string("op_1628"), val = int32(-1)]; bool var_1629_interleave_0 = const()[name = string("op_1629_interleave_0"), val = bool(false)]; tensor var_1629 = concat(axis = var_1628, interleave = var_1629_interleave_0, values = (var_1626, x1_5))[name = string("op_1629")]; tensor var_1630 = mul(x = var_1629, y = sin_1_cast_fp16)[name = string("op_1630")]; tensor query_states_5 = add(x = var_1605, y = var_1630)[name = string("query_states_5")]; tensor var_1633 = mul(x = var_1602_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1633")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_1602_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_1602_cast_fp16)[name = string("x2_7")]; fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; tensor var_1654 = mul(x = x2_7, y = const_26_promoted)[name = string("op_1654")]; int32 var_1656 = const()[name = string("op_1656"), val = int32(-1)]; bool var_1657_interleave_0 = const()[name = string("op_1657_interleave_0"), val = bool(false)]; tensor var_1657 = concat(axis = var_1656, interleave = var_1657_interleave_0, values = (var_1654, x1_7))[name = string("op_1657")]; tensor var_1658 = mul(x = var_1657, y = sin_1_cast_fp16)[name = string("op_1658")]; tensor key_states_5 = add(x = var_1633, y = var_1658)[name = string("key_states_5")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1195, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_5, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([37])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([38])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1195, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1563, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_1713_end_0 = const()[name = string("op_1713_end_0"), val = tensor([2, 8, 1024, 128])]; tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = var_1713_end_0, end_mask = var_1713_end_mask_0, x = coreml_update_state_39)[name = string("op_1713_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_1713_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_1720_begin_0 = const()[name = string("op_1720_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_1720_end_0 = const()[name = string("op_1720_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_1720_end_mask_0 = const()[name = string("op_1720_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1720_cast_fp16 = slice_by_index(begin = var_1720_begin_0, end = var_1720_end_0, end_mask = var_1720_end_mask_0, x = coreml_update_state_39)[name = string("op_1720_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_1720_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_1757 = const()[name = string("op_1757"), val = tensor([1, 4, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_1757, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_1769 = const()[name = string("op_1769"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_1769, x = x_29_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_1777 = const()[name = string("op_1777"), val = tensor([1, 4, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_1777, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; tensor var_1789 = const()[name = string("op_1789"), val = tensor([1, -1, 1024, 128])]; tensor value_states_9_cast_fp16 = reshape(shape = var_1789, x = x_35_cast_fp16)[name = string("value_states_9_cast_fp16")]; bool var_1804_transpose_x_1 = const()[name = string("op_1804_transpose_x_1"), val = bool(false)]; bool var_1804_transpose_y_1 = const()[name = string("op_1804_transpose_y_1"), val = bool(true)]; tensor var_1804 = matmul(transpose_x = var_1804_transpose_x_1, transpose_y = var_1804_transpose_y_1, x = query_states_5, y = key_states_7_cast_fp16)[name = string("op_1804")]; fp16 var_1805_to_fp16 = const()[name = string("op_1805_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_1804, y = var_1805_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_1840 = const()[name = string("op_1840"), val = int32(-1)]; tensor attn_weights_11_cast_fp16 = softmax(axis = var_1840, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_9_cast_fp16)[name = string("attn_output_11_cast_fp16")]; tensor var_1851_perm_0 = const()[name = string("op_1851_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1855 = const()[name = string("op_1855"), val = tensor([1, 1, 4096])]; tensor var_1851_cast_fp16 = transpose(perm = var_1851_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_100")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_1855, x = var_1851_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_1860 = const()[name = string("op_1860"), val = tensor([0, 2, 1])]; string var_1876_pad_type_0 = const()[name = string("op_1876_pad_type_0"), val = string("valid")]; int32 var_1876_groups_0 = const()[name = string("op_1876_groups_0"), val = int32(1)]; tensor var_1876_strides_0 = const()[name = string("op_1876_strides_0"), val = tensor([1])]; tensor var_1876_pad_0 = const()[name = string("op_1876_pad_0"), val = tensor([0, 0])]; tensor var_1876_dilations_0 = const()[name = string("op_1876_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854823168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860066112))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1861_cast_fp16 = transpose(perm = var_1860, x = attn_output_15_cast_fp16)[name = string("transpose_99")]; tensor var_1876_cast_fp16 = conv(dilations = var_1876_dilations_0, groups = var_1876_groups_0, pad = var_1876_pad_0, pad_type = var_1876_pad_type_0, strides = var_1876_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("op_1876_cast_fp16")]; tensor var_1880 = const()[name = string("op_1880"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_1880, x = var_1876_cast_fp16)[name = string("transpose_98")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_1899_axes_0 = const()[name = string("op_1899_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860076416)))]; fp16 var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1899_cast_fp16 = layer_norm(axes = var_1899_axes_0, epsilon = var_1887_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_1899_cast_fp16")]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_1914 = transpose(perm = var_1913, x = var_1899_cast_fp16)[name = string("transpose_97")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_1914)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_1936_axes_0 = const()[name = string("op_1936_axes_0"), val = tensor([2])]; tensor var_1936 = squeeze(axes = var_1936_axes_0, x = e_3)[name = string("op_1936")]; tensor var_1937 = const()[name = string("op_1937"), val = tensor([0, 2, 1])]; tensor var_1938 = transpose(perm = var_1937, x = var_1936)[name = string("transpose_96")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_1938)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_1956_axes_0 = const()[name = string("op_1956_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860081600)))]; fp16 var_1944_to_fp16 = const()[name = string("op_1944_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1956_cast_fp16 = layer_norm(axes = var_1956_axes_0, epsilon = var_1944_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_1956_cast_fp16")]; tensor var_1962 = const()[name = string("op_1962"), val = tensor([0, 2, 1])]; tensor var_1965_axes_0 = const()[name = string("op_1965_axes_0"), val = tensor([2])]; tensor var_1963 = transpose(perm = var_1962, x = var_1956_cast_fp16)[name = string("transpose_95")]; tensor var_1965 = expand_dims(axes = var_1965_axes_0, x = var_1963)[name = string("op_1965")]; string var_1981_pad_type_0 = const()[name = string("op_1981_pad_type_0"), val = string("valid")]; tensor var_1981_strides_0 = const()[name = string("op_1981_strides_0"), val = tensor([1, 1])]; tensor var_1981_pad_0 = const()[name = string("op_1981_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1981_dilations_0 = const()[name = string("op_1981_dilations_0"), val = tensor([1, 1])]; int32 var_1981_groups_0 = const()[name = string("op_1981_groups_0"), val = int32(1)]; tensor var_1981 = conv(dilations = var_1981_dilations_0, groups = var_1981_groups_0, pad = var_1981_pad_0, pad_type = var_1981_pad_type_0, strides = var_1981_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_1965)[name = string("op_1981")]; tensor var_1986 = const()[name = string("op_1986"), val = tensor([1, 32, 1, 128])]; tensor var_1987 = reshape(shape = var_1986, x = var_1981)[name = string("op_1987")]; string var_2003_pad_type_0 = const()[name = string("op_2003_pad_type_0"), val = string("valid")]; tensor var_2003_strides_0 = const()[name = string("op_2003_strides_0"), val = tensor([1, 1])]; tensor var_2003_pad_0 = const()[name = string("op_2003_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2003_dilations_0 = const()[name = string("op_2003_dilations_0"), val = tensor([1, 1])]; int32 var_2003_groups_0 = const()[name = string("op_2003_groups_0"), val = int32(1)]; tensor var_2003 = conv(dilations = var_2003_dilations_0, groups = var_2003_groups_0, pad = var_2003_pad_0, pad_type = var_2003_pad_type_0, strides = var_2003_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_1965)[name = string("op_2003")]; tensor var_2008 = const()[name = string("op_2008"), val = tensor([1, 8, 1, 128])]; tensor var_2009 = reshape(shape = var_2008, x = var_2003)[name = string("op_2009")]; string var_2025_pad_type_0 = const()[name = string("op_2025_pad_type_0"), val = string("valid")]; tensor var_2025_strides_0 = const()[name = string("op_2025_strides_0"), val = tensor([1, 1])]; tensor var_2025_pad_0 = const()[name = string("op_2025_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2025_dilations_0 = const()[name = string("op_2025_dilations_0"), val = tensor([1, 1])]; int32 var_2025_groups_0 = const()[name = string("op_2025_groups_0"), val = int32(1)]; tensor var_2025 = conv(dilations = var_2025_dilations_0, groups = var_2025_groups_0, pad = var_2025_pad_0, pad_type = var_2025_pad_type_0, strides = var_2025_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_1965)[name = string("op_2025")]; tensor var_2030 = const()[name = string("op_2030"), val = tensor([1, 8, 1, 128])]; tensor var_2031 = reshape(shape = var_2030, x = var_2025)[name = string("op_2031")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = var_1987)[name = string("mean_19")]; tensor input_41 = sub(x = var_1987, y = mean_19)[name = string("input_41")]; tensor var_2052_axes_0 = const()[name = string("op_2052_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860086784)))]; fp16 var_2040_to_fp16 = const()[name = string("op_2040_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2052_cast_fp16 = layer_norm(axes = var_2052_axes_0, epsilon = var_2040_to_fp16, gamma = model_model_layers_2_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2052_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = var_2009)[name = string("mean_21")]; tensor input_43 = sub(x = var_2009, y = mean_21)[name = string("input_43")]; tensor var_2070_axes_0 = const()[name = string("op_2070_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087104)))]; fp16 var_2058_to_fp16 = const()[name = string("op_2058_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2070_cast_fp16 = layer_norm(axes = var_2070_axes_0, epsilon = var_2058_to_fp16, gamma = model_model_layers_2_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2070_cast_fp16")]; tensor var_2073 = mul(x = var_2052_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2073")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2052_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2052_cast_fp16)[name = string("x2_9")]; fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; tensor var_2094 = mul(x = x2_9, y = const_41_promoted)[name = string("op_2094")]; int32 var_2096 = const()[name = string("op_2096"), val = int32(-1)]; bool var_2097_interleave_0 = const()[name = string("op_2097_interleave_0"), val = bool(false)]; tensor var_2097 = concat(axis = var_2096, interleave = var_2097_interleave_0, values = (var_2094, x1_9))[name = string("op_2097")]; tensor var_2098 = mul(x = var_2097, y = sin_1_cast_fp16)[name = string("op_2098")]; tensor query_states_9 = add(x = var_2073, y = var_2098)[name = string("query_states_9")]; tensor var_2101 = mul(x = var_2070_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2101")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2070_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2070_cast_fp16)[name = string("x2_11")]; fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; tensor var_2122 = mul(x = x2_11, y = const_44_promoted)[name = string("op_2122")]; int32 var_2124 = const()[name = string("op_2124"), val = int32(-1)]; bool var_2125_interleave_0 = const()[name = string("op_2125_interleave_0"), val = bool(false)]; tensor var_2125 = concat(axis = var_2124, interleave = var_2125_interleave_0, values = (var_2122, x1_11))[name = string("op_2125")]; tensor var_2126 = mul(x = var_2125, y = sin_1_cast_fp16)[name = string("op_2126")]; tensor key_states_9 = add(x = var_2101, y = var_2126)[name = string("key_states_9")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1195, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_9, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([38])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([39])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1195, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2031, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_2181_begin_0 = const()[name = string("op_2181_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2181_end_0 = const()[name = string("op_2181_end_0"), val = tensor([3, 8, 1024, 128])]; tensor var_2181_end_mask_0 = const()[name = string("op_2181_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2181_cast_fp16 = slice_by_index(begin = var_2181_begin_0, end = var_2181_end_0, end_mask = var_2181_end_mask_0, x = coreml_update_state_41)[name = string("op_2181_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2181_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2188_begin_0 = const()[name = string("op_2188_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_2188_end_0 = const()[name = string("op_2188_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_2188_end_mask_0 = const()[name = string("op_2188_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = coreml_update_state_41)[name = string("op_2188_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2188_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2225 = const()[name = string("op_2225"), val = tensor([1, 4, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2225, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2237 = const()[name = string("op_2237"), val = tensor([1, -1, 1024, 128])]; tensor key_states_11_cast_fp16 = reshape(shape = var_2237, x = x_49_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2245 = const()[name = string("op_2245"), val = tensor([1, 4, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2245, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_2257 = const()[name = string("op_2257"), val = tensor([1, -1, 1024, 128])]; tensor value_states_15_cast_fp16 = reshape(shape = var_2257, x = x_55_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_2272_transpose_x_1 = const()[name = string("op_2272_transpose_x_1"), val = bool(false)]; bool var_2272_transpose_y_1 = const()[name = string("op_2272_transpose_y_1"), val = bool(true)]; tensor var_2272 = matmul(transpose_x = var_2272_transpose_x_1, transpose_y = var_2272_transpose_y_1, x = query_states_9, y = key_states_11_cast_fp16)[name = string("op_2272")]; fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2272, y = var_2273_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2308 = const()[name = string("op_2308"), val = int32(-1)]; tensor attn_weights_17_cast_fp16 = softmax(axis = var_2308, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_2319_perm_0 = const()[name = string("op_2319_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2323 = const()[name = string("op_2323"), val = tensor([1, 1, 4096])]; tensor var_2319_cast_fp16 = transpose(perm = var_2319_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_94")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2323, x = var_2319_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2328 = const()[name = string("op_2328"), val = tensor([0, 2, 1])]; string var_2344_pad_type_0 = const()[name = string("op_2344_pad_type_0"), val = string("valid")]; int32 var_2344_groups_0 = const()[name = string("op_2344_groups_0"), val = int32(1)]; tensor var_2344_strides_0 = const()[name = string("op_2344_strides_0"), val = tensor([1])]; tensor var_2344_pad_0 = const()[name = string("op_2344_pad_0"), val = tensor([0, 0])]; tensor var_2344_dilations_0 = const()[name = string("op_2344_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865330368))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2329_cast_fp16 = transpose(perm = var_2328, x = attn_output_25_cast_fp16)[name = string("transpose_93")]; tensor var_2344_cast_fp16 = conv(dilations = var_2344_dilations_0, groups = var_2344_groups_0, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2344_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2329_cast_fp16)[name = string("op_2344_cast_fp16")]; tensor var_2348 = const()[name = string("op_2348"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_2348, x = var_2344_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_2367_axes_0 = const()[name = string("op_2367_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865340672)))]; fp16 var_2355_to_fp16 = const()[name = string("op_2355_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2367_cast_fp16 = layer_norm(axes = var_2367_axes_0, epsilon = var_2355_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_2367_cast_fp16")]; tensor var_2381 = const()[name = string("op_2381"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_2382 = transpose(perm = var_2381, x = var_2367_cast_fp16)[name = string("transpose_91")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2382)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_2404_axes_0 = const()[name = string("op_2404_axes_0"), val = tensor([2])]; tensor var_2404 = squeeze(axes = var_2404_axes_0, x = e_5)[name = string("op_2404")]; tensor var_2405 = const()[name = string("op_2405"), val = tensor([0, 2, 1])]; tensor var_2406 = transpose(perm = var_2405, x = var_2404)[name = string("transpose_90")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_2406)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_2424_axes_0 = const()[name = string("op_2424_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865345856)))]; fp16 var_2412_to_fp16 = const()[name = string("op_2412_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2424_cast_fp16 = layer_norm(axes = var_2424_axes_0, epsilon = var_2412_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_2424_cast_fp16")]; tensor var_2430 = const()[name = string("op_2430"), val = tensor([0, 2, 1])]; tensor var_2433_axes_0 = const()[name = string("op_2433_axes_0"), val = tensor([2])]; tensor var_2431 = transpose(perm = var_2430, x = var_2424_cast_fp16)[name = string("transpose_89")]; tensor var_2433 = expand_dims(axes = var_2433_axes_0, x = var_2431)[name = string("op_2433")]; string var_2449_pad_type_0 = const()[name = string("op_2449_pad_type_0"), val = string("valid")]; tensor var_2449_strides_0 = const()[name = string("op_2449_strides_0"), val = tensor([1, 1])]; tensor var_2449_pad_0 = const()[name = string("op_2449_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2449_dilations_0 = const()[name = string("op_2449_dilations_0"), val = tensor([1, 1])]; int32 var_2449_groups_0 = const()[name = string("op_2449_groups_0"), val = int32(1)]; tensor var_2449 = conv(dilations = var_2449_dilations_0, groups = var_2449_groups_0, pad = var_2449_pad_0, pad_type = var_2449_pad_type_0, strides = var_2449_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_2433)[name = string("op_2449")]; tensor var_2454 = const()[name = string("op_2454"), val = tensor([1, 32, 1, 128])]; tensor var_2455 = reshape(shape = var_2454, x = var_2449)[name = string("op_2455")]; string var_2471_pad_type_0 = const()[name = string("op_2471_pad_type_0"), val = string("valid")]; tensor var_2471_strides_0 = const()[name = string("op_2471_strides_0"), val = tensor([1, 1])]; tensor var_2471_pad_0 = const()[name = string("op_2471_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2471_dilations_0 = const()[name = string("op_2471_dilations_0"), val = tensor([1, 1])]; int32 var_2471_groups_0 = const()[name = string("op_2471_groups_0"), val = int32(1)]; tensor var_2471 = conv(dilations = var_2471_dilations_0, groups = var_2471_groups_0, pad = var_2471_pad_0, pad_type = var_2471_pad_type_0, strides = var_2471_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_2433)[name = string("op_2471")]; tensor var_2476 = const()[name = string("op_2476"), val = tensor([1, 8, 1, 128])]; tensor var_2477 = reshape(shape = var_2476, x = var_2471)[name = string("op_2477")]; string var_2493_pad_type_0 = const()[name = string("op_2493_pad_type_0"), val = string("valid")]; tensor var_2493_strides_0 = const()[name = string("op_2493_strides_0"), val = tensor([1, 1])]; tensor var_2493_pad_0 = const()[name = string("op_2493_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2493_dilations_0 = const()[name = string("op_2493_dilations_0"), val = tensor([1, 1])]; int32 var_2493_groups_0 = const()[name = string("op_2493_groups_0"), val = int32(1)]; tensor var_2493 = conv(dilations = var_2493_dilations_0, groups = var_2493_groups_0, pad = var_2493_pad_0, pad_type = var_2493_pad_type_0, strides = var_2493_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_2433)[name = string("op_2493")]; tensor var_2498 = const()[name = string("op_2498"), val = tensor([1, 8, 1, 128])]; tensor var_2499 = reshape(shape = var_2498, x = var_2493)[name = string("op_2499")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = var_2455)[name = string("mean_27")]; tensor input_59 = sub(x = var_2455, y = mean_27)[name = string("input_59")]; tensor var_2520_axes_0 = const()[name = string("op_2520_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351040)))]; fp16 var_2508_to_fp16 = const()[name = string("op_2508_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2520_cast_fp16 = layer_norm(axes = var_2520_axes_0, epsilon = var_2508_to_fp16, gamma = model_model_layers_3_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_2520_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = var_2477)[name = string("mean_29")]; tensor input_61 = sub(x = var_2477, y = mean_29)[name = string("input_61")]; tensor var_2538_axes_0 = const()[name = string("op_2538_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351360)))]; fp16 var_2526_to_fp16 = const()[name = string("op_2526_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2538_cast_fp16 = layer_norm(axes = var_2538_axes_0, epsilon = var_2526_to_fp16, gamma = model_model_layers_3_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_2538_cast_fp16")]; tensor var_2541 = mul(x = var_2520_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2541")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_2520_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_2520_cast_fp16)[name = string("x2_13")]; fp16 const_59_promoted = const()[name = string("const_59_promoted"), val = fp16(-0x1p+0)]; tensor var_2562 = mul(x = x2_13, y = const_59_promoted)[name = string("op_2562")]; int32 var_2564 = const()[name = string("op_2564"), val = int32(-1)]; bool var_2565_interleave_0 = const()[name = string("op_2565_interleave_0"), val = bool(false)]; tensor var_2565 = concat(axis = var_2564, interleave = var_2565_interleave_0, values = (var_2562, x1_13))[name = string("op_2565")]; tensor var_2566 = mul(x = var_2565, y = sin_1_cast_fp16)[name = string("op_2566")]; tensor query_states_13 = add(x = var_2541, y = var_2566)[name = string("query_states_13")]; tensor var_2569 = mul(x = var_2538_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2569")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_2538_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_2538_cast_fp16)[name = string("x2_15")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_2590 = mul(x = x2_15, y = const_62_promoted)[name = string("op_2590")]; int32 var_2592 = const()[name = string("op_2592"), val = int32(-1)]; bool var_2593_interleave_0 = const()[name = string("op_2593_interleave_0"), val = bool(false)]; tensor var_2593 = concat(axis = var_2592, interleave = var_2593_interleave_0, values = (var_2590, x1_15))[name = string("op_2593")]; tensor var_2594 = mul(x = var_2593, y = sin_1_cast_fp16)[name = string("op_2594")]; tensor key_states_13 = add(x = var_2569, y = var_2594)[name = string("key_states_13")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1195, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_13, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([39])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([40])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1195, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_2499, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_2649_begin_0 = const()[name = string("op_2649_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_2649_end_0 = const()[name = string("op_2649_end_0"), val = tensor([4, 8, 1024, 128])]; tensor var_2649_end_mask_0 = const()[name = string("op_2649_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2649_cast_fp16 = slice_by_index(begin = var_2649_begin_0, end = var_2649_end_0, end_mask = var_2649_end_mask_0, x = coreml_update_state_43)[name = string("op_2649_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_2649_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_2656_begin_0 = const()[name = string("op_2656_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_2656_end_0 = const()[name = string("op_2656_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_2656_end_mask_0 = const()[name = string("op_2656_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2656_cast_fp16 = slice_by_index(begin = var_2656_begin_0, end = var_2656_end_0, end_mask = var_2656_end_mask_0, x = coreml_update_state_43)[name = string("op_2656_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_2656_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_2693 = const()[name = string("op_2693"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_2693, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_2705 = const()[name = string("op_2705"), val = tensor([1, -1, 1024, 128])]; tensor key_states_15_cast_fp16 = reshape(shape = var_2705, x = x_69_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_2713 = const()[name = string("op_2713"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_2713, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_2725 = const()[name = string("op_2725"), val = tensor([1, -1, 1024, 128])]; tensor value_states_21_cast_fp16 = reshape(shape = var_2725, x = x_75_cast_fp16)[name = string("value_states_21_cast_fp16")]; bool var_2740_transpose_x_1 = const()[name = string("op_2740_transpose_x_1"), val = bool(false)]; bool var_2740_transpose_y_1 = const()[name = string("op_2740_transpose_y_1"), val = bool(true)]; tensor var_2740 = matmul(transpose_x = var_2740_transpose_x_1, transpose_y = var_2740_transpose_y_1, x = query_states_13, y = key_states_15_cast_fp16)[name = string("op_2740")]; fp16 var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_2740, y = var_2741_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_2776 = const()[name = string("op_2776"), val = int32(-1)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_2776, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_21_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_2787_perm_0 = const()[name = string("op_2787_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2791 = const()[name = string("op_2791"), val = tensor([1, 1, 4096])]; tensor var_2787_cast_fp16 = transpose(perm = var_2787_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_88")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_2791, x = var_2787_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1])]; string var_2812_pad_type_0 = const()[name = string("op_2812_pad_type_0"), val = string("valid")]; int32 var_2812_groups_0 = const()[name = string("op_2812_groups_0"), val = int32(1)]; tensor var_2812_strides_0 = const()[name = string("op_2812_strides_0"), val = tensor([1])]; tensor var_2812_pad_0 = const()[name = string("op_2812_pad_0"), val = tensor([0, 0])]; tensor var_2812_dilations_0 = const()[name = string("op_2812_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870594624))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2797_cast_fp16 = transpose(perm = var_2796, x = attn_output_35_cast_fp16)[name = string("transpose_87")]; tensor var_2812_cast_fp16 = conv(dilations = var_2812_dilations_0, groups = var_2812_groups_0, pad = var_2812_pad_0, pad_type = var_2812_pad_type_0, strides = var_2812_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2797_cast_fp16)[name = string("op_2812_cast_fp16")]; tensor var_2816 = const()[name = string("op_2816"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_2816, x = var_2812_cast_fp16)[name = string("transpose_86")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_2835_axes_0 = const()[name = string("op_2835_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870604928)))]; fp16 var_2823_to_fp16 = const()[name = string("op_2823_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2835_cast_fp16 = layer_norm(axes = var_2835_axes_0, epsilon = var_2823_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_2835_cast_fp16")]; tensor var_2849 = const()[name = string("op_2849"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_2850 = transpose(perm = var_2849, x = var_2835_cast_fp16)[name = string("transpose_85")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_2850)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_2872_axes_0 = const()[name = string("op_2872_axes_0"), val = tensor([2])]; tensor var_2872 = squeeze(axes = var_2872_axes_0, x = e_7)[name = string("op_2872")]; tensor var_2873 = const()[name = string("op_2873"), val = tensor([0, 2, 1])]; tensor var_2874 = transpose(perm = var_2873, x = var_2872)[name = string("transpose_84")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_2874)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_2892_axes_0 = const()[name = string("op_2892_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870610112)))]; fp16 var_2880_to_fp16 = const()[name = string("op_2880_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2892_cast_fp16 = layer_norm(axes = var_2892_axes_0, epsilon = var_2880_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_2892_cast_fp16")]; tensor var_2898 = const()[name = string("op_2898"), val = tensor([0, 2, 1])]; tensor var_2901_axes_0 = const()[name = string("op_2901_axes_0"), val = tensor([2])]; tensor var_2899 = transpose(perm = var_2898, x = var_2892_cast_fp16)[name = string("transpose_83")]; tensor var_2901 = expand_dims(axes = var_2901_axes_0, x = var_2899)[name = string("op_2901")]; string var_2917_pad_type_0 = const()[name = string("op_2917_pad_type_0"), val = string("valid")]; tensor var_2917_strides_0 = const()[name = string("op_2917_strides_0"), val = tensor([1, 1])]; tensor var_2917_pad_0 = const()[name = string("op_2917_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2917_dilations_0 = const()[name = string("op_2917_dilations_0"), val = tensor([1, 1])]; int32 var_2917_groups_0 = const()[name = string("op_2917_groups_0"), val = int32(1)]; tensor var_2917 = conv(dilations = var_2917_dilations_0, groups = var_2917_groups_0, pad = var_2917_pad_0, pad_type = var_2917_pad_type_0, strides = var_2917_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_2901)[name = string("op_2917")]; tensor var_2922 = const()[name = string("op_2922"), val = tensor([1, 32, 1, 128])]; tensor var_2923 = reshape(shape = var_2922, x = var_2917)[name = string("op_2923")]; string var_2939_pad_type_0 = const()[name = string("op_2939_pad_type_0"), val = string("valid")]; tensor var_2939_strides_0 = const()[name = string("op_2939_strides_0"), val = tensor([1, 1])]; tensor var_2939_pad_0 = const()[name = string("op_2939_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2939_dilations_0 = const()[name = string("op_2939_dilations_0"), val = tensor([1, 1])]; int32 var_2939_groups_0 = const()[name = string("op_2939_groups_0"), val = int32(1)]; tensor var_2939 = conv(dilations = var_2939_dilations_0, groups = var_2939_groups_0, pad = var_2939_pad_0, pad_type = var_2939_pad_type_0, strides = var_2939_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_2901)[name = string("op_2939")]; tensor var_2944 = const()[name = string("op_2944"), val = tensor([1, 8, 1, 128])]; tensor var_2945 = reshape(shape = var_2944, x = var_2939)[name = string("op_2945")]; string var_2961_pad_type_0 = const()[name = string("op_2961_pad_type_0"), val = string("valid")]; tensor var_2961_strides_0 = const()[name = string("op_2961_strides_0"), val = tensor([1, 1])]; tensor var_2961_pad_0 = const()[name = string("op_2961_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2961_dilations_0 = const()[name = string("op_2961_dilations_0"), val = tensor([1, 1])]; int32 var_2961_groups_0 = const()[name = string("op_2961_groups_0"), val = int32(1)]; tensor var_2961 = conv(dilations = var_2961_dilations_0, groups = var_2961_groups_0, pad = var_2961_pad_0, pad_type = var_2961_pad_type_0, strides = var_2961_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_2901)[name = string("op_2961")]; tensor var_2966 = const()[name = string("op_2966"), val = tensor([1, 8, 1, 128])]; tensor var_2967 = reshape(shape = var_2966, x = var_2961)[name = string("op_2967")]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = var_2923)[name = string("mean_35")]; tensor input_77 = sub(x = var_2923, y = mean_35)[name = string("input_77")]; tensor var_2988_axes_0 = const()[name = string("op_2988_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615296)))]; fp16 var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2988_cast_fp16 = layer_norm(axes = var_2988_axes_0, epsilon = var_2976_to_fp16, gamma = model_model_layers_4_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_2988_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = var_2945)[name = string("mean_37")]; tensor input_79 = sub(x = var_2945, y = mean_37)[name = string("input_79")]; tensor var_3006_axes_0 = const()[name = string("op_3006_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615616)))]; fp16 var_2994_to_fp16 = const()[name = string("op_2994_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3006_cast_fp16 = layer_norm(axes = var_3006_axes_0, epsilon = var_2994_to_fp16, gamma = model_model_layers_4_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3006_cast_fp16")]; tensor var_3009 = mul(x = var_2988_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3009")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_2988_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_2988_cast_fp16)[name = string("x2_17")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_3030 = mul(x = x2_17, y = const_77_promoted)[name = string("op_3030")]; int32 var_3032 = const()[name = string("op_3032"), val = int32(-1)]; bool var_3033_interleave_0 = const()[name = string("op_3033_interleave_0"), val = bool(false)]; tensor var_3033 = concat(axis = var_3032, interleave = var_3033_interleave_0, values = (var_3030, x1_17))[name = string("op_3033")]; tensor var_3034 = mul(x = var_3033, y = sin_1_cast_fp16)[name = string("op_3034")]; tensor query_states_17 = add(x = var_3009, y = var_3034)[name = string("query_states_17")]; tensor var_3037 = mul(x = var_3006_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3037")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3006_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3006_cast_fp16)[name = string("x2_19")]; fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; tensor var_3058 = mul(x = x2_19, y = const_80_promoted)[name = string("op_3058")]; int32 var_3060 = const()[name = string("op_3060"), val = int32(-1)]; bool var_3061_interleave_0 = const()[name = string("op_3061_interleave_0"), val = bool(false)]; tensor var_3061 = concat(axis = var_3060, interleave = var_3061_interleave_0, values = (var_3058, x1_19))[name = string("op_3061")]; tensor var_3062 = mul(x = var_3061, y = sin_1_cast_fp16)[name = string("op_3062")]; tensor key_states_17 = add(x = var_3037, y = var_3062)[name = string("key_states_17")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1195, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_17, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([40])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([41])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1195, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_2967, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_3117_begin_0 = const()[name = string("op_3117_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3117_end_0 = const()[name = string("op_3117_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_3117_end_mask_0 = const()[name = string("op_3117_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3117_cast_fp16 = slice_by_index(begin = var_3117_begin_0, end = var_3117_end_0, end_mask = var_3117_end_mask_0, x = coreml_update_state_45)[name = string("op_3117_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3117_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3124_begin_0 = const()[name = string("op_3124_begin_0"), val = tensor([40, 0, 0, 0])]; tensor var_3124_end_0 = const()[name = string("op_3124_end_0"), val = tensor([41, 8, 1024, 128])]; tensor var_3124_end_mask_0 = const()[name = string("op_3124_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3124_cast_fp16 = slice_by_index(begin = var_3124_begin_0, end = var_3124_end_0, end_mask = var_3124_end_mask_0, x = coreml_update_state_45)[name = string("op_3124_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3124_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3161 = const()[name = string("op_3161"), val = tensor([1, 4, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3161, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3173 = const()[name = string("op_3173"), val = tensor([1, -1, 1024, 128])]; tensor key_states_19_cast_fp16 = reshape(shape = var_3173, x = x_89_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3181 = const()[name = string("op_3181"), val = tensor([1, 4, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3181, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_3193 = const()[name = string("op_3193"), val = tensor([1, -1, 1024, 128])]; tensor value_states_27_cast_fp16 = reshape(shape = var_3193, x = x_95_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_3208_transpose_x_1 = const()[name = string("op_3208_transpose_x_1"), val = bool(false)]; bool var_3208_transpose_y_1 = const()[name = string("op_3208_transpose_y_1"), val = bool(true)]; tensor var_3208 = matmul(transpose_x = var_3208_transpose_x_1, transpose_y = var_3208_transpose_y_1, x = query_states_17, y = key_states_19_cast_fp16)[name = string("op_3208")]; fp16 var_3209_to_fp16 = const()[name = string("op_3209_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_3208, y = var_3209_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_3244 = const()[name = string("op_3244"), val = int32(-1)]; tensor attn_weights_29_cast_fp16 = softmax(axis = var_3244, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_3255_perm_0 = const()[name = string("op_3255_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3259 = const()[name = string("op_3259"), val = tensor([1, 1, 4096])]; tensor var_3255_cast_fp16 = transpose(perm = var_3255_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_82")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_3259, x = var_3255_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_3264 = const()[name = string("op_3264"), val = tensor([0, 2, 1])]; string var_3280_pad_type_0 = const()[name = string("op_3280_pad_type_0"), val = string("valid")]; int32 var_3280_groups_0 = const()[name = string("op_3280_groups_0"), val = int32(1)]; tensor var_3280_strides_0 = const()[name = string("op_3280_strides_0"), val = tensor([1])]; tensor var_3280_pad_0 = const()[name = string("op_3280_pad_0"), val = tensor([0, 0])]; tensor var_3280_dilations_0 = const()[name = string("op_3280_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875858880))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3265_cast_fp16 = transpose(perm = var_3264, x = attn_output_45_cast_fp16)[name = string("transpose_81")]; tensor var_3280_cast_fp16 = conv(dilations = var_3280_dilations_0, groups = var_3280_groups_0, pad = var_3280_pad_0, pad_type = var_3280_pad_type_0, strides = var_3280_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3265_cast_fp16)[name = string("op_3280_cast_fp16")]; tensor var_3284 = const()[name = string("op_3284"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_3284, x = var_3280_cast_fp16)[name = string("transpose_80")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_3303_axes_0 = const()[name = string("op_3303_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875869184)))]; fp16 var_3291_to_fp16 = const()[name = string("op_3291_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3303_cast_fp16 = layer_norm(axes = var_3303_axes_0, epsilon = var_3291_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_3303_cast_fp16")]; tensor var_3317 = const()[name = string("op_3317"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_3318 = transpose(perm = var_3317, x = var_3303_cast_fp16)[name = string("transpose_79")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3318)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_3340_axes_0 = const()[name = string("op_3340_axes_0"), val = tensor([2])]; tensor var_3340 = squeeze(axes = var_3340_axes_0, x = e_9)[name = string("op_3340")]; tensor var_3341 = const()[name = string("op_3341"), val = tensor([0, 2, 1])]; tensor var_3342 = transpose(perm = var_3341, x = var_3340)[name = string("transpose_78")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3342)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_3360_axes_0 = const()[name = string("op_3360_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875874368)))]; fp16 var_3348_to_fp16 = const()[name = string("op_3348_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3360_cast_fp16 = layer_norm(axes = var_3360_axes_0, epsilon = var_3348_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_3360_cast_fp16")]; tensor var_3366 = const()[name = string("op_3366"), val = tensor([0, 2, 1])]; tensor var_3369_axes_0 = const()[name = string("op_3369_axes_0"), val = tensor([2])]; tensor var_3367 = transpose(perm = var_3366, x = var_3360_cast_fp16)[name = string("transpose_77")]; tensor var_3369 = expand_dims(axes = var_3369_axes_0, x = var_3367)[name = string("op_3369")]; string var_3385_pad_type_0 = const()[name = string("op_3385_pad_type_0"), val = string("valid")]; tensor var_3385_strides_0 = const()[name = string("op_3385_strides_0"), val = tensor([1, 1])]; tensor var_3385_pad_0 = const()[name = string("op_3385_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3385_dilations_0 = const()[name = string("op_3385_dilations_0"), val = tensor([1, 1])]; int32 var_3385_groups_0 = const()[name = string("op_3385_groups_0"), val = int32(1)]; tensor var_3385 = conv(dilations = var_3385_dilations_0, groups = var_3385_groups_0, pad = var_3385_pad_0, pad_type = var_3385_pad_type_0, strides = var_3385_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_3369)[name = string("op_3385")]; tensor var_3390 = const()[name = string("op_3390"), val = tensor([1, 32, 1, 128])]; tensor var_3391 = reshape(shape = var_3390, x = var_3385)[name = string("op_3391")]; string var_3407_pad_type_0 = const()[name = string("op_3407_pad_type_0"), val = string("valid")]; tensor var_3407_strides_0 = const()[name = string("op_3407_strides_0"), val = tensor([1, 1])]; tensor var_3407_pad_0 = const()[name = string("op_3407_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3407_dilations_0 = const()[name = string("op_3407_dilations_0"), val = tensor([1, 1])]; int32 var_3407_groups_0 = const()[name = string("op_3407_groups_0"), val = int32(1)]; tensor var_3407 = conv(dilations = var_3407_dilations_0, groups = var_3407_groups_0, pad = var_3407_pad_0, pad_type = var_3407_pad_type_0, strides = var_3407_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_3369)[name = string("op_3407")]; tensor var_3412 = const()[name = string("op_3412"), val = tensor([1, 8, 1, 128])]; tensor var_3413 = reshape(shape = var_3412, x = var_3407)[name = string("op_3413")]; string var_3429_pad_type_0 = const()[name = string("op_3429_pad_type_0"), val = string("valid")]; tensor var_3429_strides_0 = const()[name = string("op_3429_strides_0"), val = tensor([1, 1])]; tensor var_3429_pad_0 = const()[name = string("op_3429_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3429_dilations_0 = const()[name = string("op_3429_dilations_0"), val = tensor([1, 1])]; int32 var_3429_groups_0 = const()[name = string("op_3429_groups_0"), val = int32(1)]; tensor var_3429 = conv(dilations = var_3429_dilations_0, groups = var_3429_groups_0, pad = var_3429_pad_0, pad_type = var_3429_pad_type_0, strides = var_3429_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_3369)[name = string("op_3429")]; tensor var_3434 = const()[name = string("op_3434"), val = tensor([1, 8, 1, 128])]; tensor var_3435 = reshape(shape = var_3434, x = var_3429)[name = string("op_3435")]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = var_3391)[name = string("mean_43")]; tensor input_95 = sub(x = var_3391, y = mean_43)[name = string("input_95")]; tensor var_3456_axes_0 = const()[name = string("op_3456_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879552)))]; fp16 var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3456_cast_fp16 = layer_norm(axes = var_3456_axes_0, epsilon = var_3444_to_fp16, gamma = model_model_layers_5_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_3456_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = var_3413)[name = string("mean_45")]; tensor input_97 = sub(x = var_3413, y = mean_45)[name = string("input_97")]; tensor var_3474_axes_0 = const()[name = string("op_3474_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879872)))]; fp16 var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3474_cast_fp16 = layer_norm(axes = var_3474_axes_0, epsilon = var_3462_to_fp16, gamma = model_model_layers_5_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_3474_cast_fp16")]; tensor var_3477 = mul(x = var_3456_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3477")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_3456_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_3456_cast_fp16)[name = string("x2_21")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_3498 = mul(x = x2_21, y = const_95_promoted)[name = string("op_3498")]; int32 var_3500 = const()[name = string("op_3500"), val = int32(-1)]; bool var_3501_interleave_0 = const()[name = string("op_3501_interleave_0"), val = bool(false)]; tensor var_3501 = concat(axis = var_3500, interleave = var_3501_interleave_0, values = (var_3498, x1_21))[name = string("op_3501")]; tensor var_3502 = mul(x = var_3501, y = sin_1_cast_fp16)[name = string("op_3502")]; tensor query_states_21 = add(x = var_3477, y = var_3502)[name = string("query_states_21")]; tensor var_3505 = mul(x = var_3474_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3505")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_3474_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_3474_cast_fp16)[name = string("x2_23")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_3526 = mul(x = x2_23, y = const_98_promoted)[name = string("op_3526")]; int32 var_3528 = const()[name = string("op_3528"), val = int32(-1)]; bool var_3529_interleave_0 = const()[name = string("op_3529_interleave_0"), val = bool(false)]; tensor var_3529 = concat(axis = var_3528, interleave = var_3529_interleave_0, values = (var_3526, x1_23))[name = string("op_3529")]; tensor var_3530 = mul(x = var_3529, y = sin_1_cast_fp16)[name = string("op_3530")]; tensor key_states_21 = add(x = var_3505, y = var_3530)[name = string("key_states_21")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_1195, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_21, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([41])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([42])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_1195, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_3435, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_3585_begin_0 = const()[name = string("op_3585_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_3585_end_0 = const()[name = string("op_3585_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_3585_end_mask_0 = const()[name = string("op_3585_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = coreml_update_state_47)[name = string("op_3585_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_3585_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_3592_begin_0 = const()[name = string("op_3592_begin_0"), val = tensor([41, 0, 0, 0])]; tensor var_3592_end_0 = const()[name = string("op_3592_end_0"), val = tensor([42, 8, 1024, 128])]; tensor var_3592_end_mask_0 = const()[name = string("op_3592_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3592_cast_fp16 = slice_by_index(begin = var_3592_begin_0, end = var_3592_end_0, end_mask = var_3592_end_mask_0, x = coreml_update_state_47)[name = string("op_3592_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_3592_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_3629 = const()[name = string("op_3629"), val = tensor([1, 4, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_3629, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, -1, 1024, 128])]; tensor key_states_23_cast_fp16 = reshape(shape = var_3641, x = x_109_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_3649 = const()[name = string("op_3649"), val = tensor([1, 4, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_3649, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_3661 = const()[name = string("op_3661"), val = tensor([1, -1, 1024, 128])]; tensor value_states_33_cast_fp16 = reshape(shape = var_3661, x = x_115_cast_fp16)[name = string("value_states_33_cast_fp16")]; bool var_3676_transpose_x_1 = const()[name = string("op_3676_transpose_x_1"), val = bool(false)]; bool var_3676_transpose_y_1 = const()[name = string("op_3676_transpose_y_1"), val = bool(true)]; tensor var_3676 = matmul(transpose_x = var_3676_transpose_x_1, transpose_y = var_3676_transpose_y_1, x = query_states_21, y = key_states_23_cast_fp16)[name = string("op_3676")]; fp16 var_3677_to_fp16 = const()[name = string("op_3677_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_3676, y = var_3677_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_3712 = const()[name = string("op_3712"), val = int32(-1)]; tensor attn_weights_35_cast_fp16 = softmax(axis = var_3712, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = value_states_33_cast_fp16)[name = string("attn_output_51_cast_fp16")]; tensor var_3723_perm_0 = const()[name = string("op_3723_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3727 = const()[name = string("op_3727"), val = tensor([1, 1, 4096])]; tensor var_3723_cast_fp16 = transpose(perm = var_3723_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_76")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_3727, x = var_3723_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_3732 = const()[name = string("op_3732"), val = tensor([0, 2, 1])]; string var_3748_pad_type_0 = const()[name = string("op_3748_pad_type_0"), val = string("valid")]; int32 var_3748_groups_0 = const()[name = string("op_3748_groups_0"), val = int32(1)]; tensor var_3748_strides_0 = const()[name = string("op_3748_strides_0"), val = tensor([1])]; tensor var_3748_pad_0 = const()[name = string("op_3748_pad_0"), val = tensor([0, 0])]; tensor var_3748_dilations_0 = const()[name = string("op_3748_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881123136))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3733_cast_fp16 = transpose(perm = var_3732, x = attn_output_55_cast_fp16)[name = string("transpose_75")]; tensor var_3748_cast_fp16 = conv(dilations = var_3748_dilations_0, groups = var_3748_groups_0, pad = var_3748_pad_0, pad_type = var_3748_pad_type_0, strides = var_3748_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_3733_cast_fp16)[name = string("op_3748_cast_fp16")]; tensor var_3752 = const()[name = string("op_3752"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_3752, x = var_3748_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_3771_axes_0 = const()[name = string("op_3771_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881133440)))]; fp16 var_3759_to_fp16 = const()[name = string("op_3759_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3771_cast_fp16 = layer_norm(axes = var_3771_axes_0, epsilon = var_3759_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_3771_cast_fp16")]; tensor var_3785 = const()[name = string("op_3785"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_3786 = transpose(perm = var_3785, x = var_3771_cast_fp16)[name = string("transpose_73")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_3786)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_3808_axes_0 = const()[name = string("op_3808_axes_0"), val = tensor([2])]; tensor var_3808 = squeeze(axes = var_3808_axes_0, x = e_11)[name = string("op_3808")]; tensor var_3809 = const()[name = string("op_3809"), val = tensor([0, 2, 1])]; tensor var_3810 = transpose(perm = var_3809, x = var_3808)[name = string("transpose_72")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_3810)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_3828_axes_0 = const()[name = string("op_3828_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881138624)))]; fp16 var_3816_to_fp16 = const()[name = string("op_3816_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3828_cast_fp16 = layer_norm(axes = var_3828_axes_0, epsilon = var_3816_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_3828_cast_fp16")]; tensor var_3834 = const()[name = string("op_3834"), val = tensor([0, 2, 1])]; tensor var_3837_axes_0 = const()[name = string("op_3837_axes_0"), val = tensor([2])]; tensor var_3835 = transpose(perm = var_3834, x = var_3828_cast_fp16)[name = string("transpose_71")]; tensor var_3837 = expand_dims(axes = var_3837_axes_0, x = var_3835)[name = string("op_3837")]; string var_3853_pad_type_0 = const()[name = string("op_3853_pad_type_0"), val = string("valid")]; tensor var_3853_strides_0 = const()[name = string("op_3853_strides_0"), val = tensor([1, 1])]; tensor var_3853_pad_0 = const()[name = string("op_3853_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3853_dilations_0 = const()[name = string("op_3853_dilations_0"), val = tensor([1, 1])]; int32 var_3853_groups_0 = const()[name = string("op_3853_groups_0"), val = int32(1)]; tensor var_3853 = conv(dilations = var_3853_dilations_0, groups = var_3853_groups_0, pad = var_3853_pad_0, pad_type = var_3853_pad_type_0, strides = var_3853_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_3837)[name = string("op_3853")]; tensor var_3858 = const()[name = string("op_3858"), val = tensor([1, 32, 1, 128])]; tensor var_3859 = reshape(shape = var_3858, x = var_3853)[name = string("op_3859")]; string var_3875_pad_type_0 = const()[name = string("op_3875_pad_type_0"), val = string("valid")]; tensor var_3875_strides_0 = const()[name = string("op_3875_strides_0"), val = tensor([1, 1])]; tensor var_3875_pad_0 = const()[name = string("op_3875_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3875_dilations_0 = const()[name = string("op_3875_dilations_0"), val = tensor([1, 1])]; int32 var_3875_groups_0 = const()[name = string("op_3875_groups_0"), val = int32(1)]; tensor var_3875 = conv(dilations = var_3875_dilations_0, groups = var_3875_groups_0, pad = var_3875_pad_0, pad_type = var_3875_pad_type_0, strides = var_3875_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_3837)[name = string("op_3875")]; tensor var_3880 = const()[name = string("op_3880"), val = tensor([1, 8, 1, 128])]; tensor var_3881 = reshape(shape = var_3880, x = var_3875)[name = string("op_3881")]; string var_3897_pad_type_0 = const()[name = string("op_3897_pad_type_0"), val = string("valid")]; tensor var_3897_strides_0 = const()[name = string("op_3897_strides_0"), val = tensor([1, 1])]; tensor var_3897_pad_0 = const()[name = string("op_3897_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3897_dilations_0 = const()[name = string("op_3897_dilations_0"), val = tensor([1, 1])]; int32 var_3897_groups_0 = const()[name = string("op_3897_groups_0"), val = int32(1)]; tensor var_3897 = conv(dilations = var_3897_dilations_0, groups = var_3897_groups_0, pad = var_3897_pad_0, pad_type = var_3897_pad_type_0, strides = var_3897_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_3837)[name = string("op_3897")]; tensor var_3902 = const()[name = string("op_3902"), val = tensor([1, 8, 1, 128])]; tensor var_3903 = reshape(shape = var_3902, x = var_3897)[name = string("op_3903")]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = var_3859)[name = string("mean_51")]; tensor input_113 = sub(x = var_3859, y = mean_51)[name = string("input_113")]; tensor var_3924_axes_0 = const()[name = string("op_3924_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881143808)))]; fp16 var_3912_to_fp16 = const()[name = string("op_3912_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3924_cast_fp16 = layer_norm(axes = var_3924_axes_0, epsilon = var_3912_to_fp16, gamma = model_model_layers_6_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_3924_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = var_3881)[name = string("mean_53")]; tensor input_115 = sub(x = var_3881, y = mean_53)[name = string("input_115")]; tensor var_3942_axes_0 = const()[name = string("op_3942_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144128)))]; fp16 var_3930_to_fp16 = const()[name = string("op_3930_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3942_cast_fp16 = layer_norm(axes = var_3942_axes_0, epsilon = var_3930_to_fp16, gamma = model_model_layers_6_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_3942_cast_fp16")]; tensor var_3945 = mul(x = var_3924_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3945")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_3924_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_3924_cast_fp16)[name = string("x2_25")]; fp16 const_113_promoted = const()[name = string("const_113_promoted"), val = fp16(-0x1p+0)]; tensor var_3966 = mul(x = x2_25, y = const_113_promoted)[name = string("op_3966")]; int32 var_3968 = const()[name = string("op_3968"), val = int32(-1)]; bool var_3969_interleave_0 = const()[name = string("op_3969_interleave_0"), val = bool(false)]; tensor var_3969 = concat(axis = var_3968, interleave = var_3969_interleave_0, values = (var_3966, x1_25))[name = string("op_3969")]; tensor var_3970 = mul(x = var_3969, y = sin_1_cast_fp16)[name = string("op_3970")]; tensor query_states_25 = add(x = var_3945, y = var_3970)[name = string("query_states_25")]; tensor var_3973 = mul(x = var_3942_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3973")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_3942_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_3942_cast_fp16)[name = string("x2_27")]; fp16 const_116_promoted = const()[name = string("const_116_promoted"), val = fp16(-0x1p+0)]; tensor var_3994 = mul(x = x2_27, y = const_116_promoted)[name = string("op_3994")]; int32 var_3996 = const()[name = string("op_3996"), val = int32(-1)]; bool var_3997_interleave_0 = const()[name = string("op_3997_interleave_0"), val = bool(false)]; tensor var_3997 = concat(axis = var_3996, interleave = var_3997_interleave_0, values = (var_3994, x1_27))[name = string("op_3997")]; tensor var_3998 = mul(x = var_3997, y = sin_1_cast_fp16)[name = string("op_3998")]; tensor key_states_25 = add(x = var_3973, y = var_3998)[name = string("key_states_25")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1195, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_25, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([42])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([43])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1195, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_3903, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_4053_begin_0 = const()[name = string("op_4053_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4053_end_0 = const()[name = string("op_4053_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_4053_end_mask_0 = const()[name = string("op_4053_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, x = coreml_update_state_49)[name = string("op_4053_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4053_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4060_begin_0 = const()[name = string("op_4060_begin_0"), val = tensor([42, 0, 0, 0])]; tensor var_4060_end_0 = const()[name = string("op_4060_end_0"), val = tensor([43, 8, 1024, 128])]; tensor var_4060_end_mask_0 = const()[name = string("op_4060_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4060_cast_fp16 = slice_by_index(begin = var_4060_begin_0, end = var_4060_end_0, end_mask = var_4060_end_mask_0, x = coreml_update_state_49)[name = string("op_4060_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4060_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4097 = const()[name = string("op_4097"), val = tensor([1, 4, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4097, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4109 = const()[name = string("op_4109"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_4109, x = x_129_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4117 = const()[name = string("op_4117"), val = tensor([1, 4, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4117, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_4129 = const()[name = string("op_4129"), val = tensor([1, -1, 1024, 128])]; tensor value_states_39_cast_fp16 = reshape(shape = var_4129, x = x_135_cast_fp16)[name = string("value_states_39_cast_fp16")]; bool var_4144_transpose_x_1 = const()[name = string("op_4144_transpose_x_1"), val = bool(false)]; bool var_4144_transpose_y_1 = const()[name = string("op_4144_transpose_y_1"), val = bool(true)]; tensor var_4144 = matmul(transpose_x = var_4144_transpose_x_1, transpose_y = var_4144_transpose_y_1, x = query_states_25, y = key_states_27_cast_fp16)[name = string("op_4144")]; fp16 var_4145_to_fp16 = const()[name = string("op_4145_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_4144, y = var_4145_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_4180 = const()[name = string("op_4180"), val = int32(-1)]; tensor attn_weights_41_cast_fp16 = softmax(axis = var_4180, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_4191_perm_0 = const()[name = string("op_4191_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4195 = const()[name = string("op_4195"), val = tensor([1, 1, 4096])]; tensor var_4191_cast_fp16 = transpose(perm = var_4191_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_70")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_4195, x = var_4191_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_4200 = const()[name = string("op_4200"), val = tensor([0, 2, 1])]; string var_4216_pad_type_0 = const()[name = string("op_4216_pad_type_0"), val = string("valid")]; int32 var_4216_groups_0 = const()[name = string("op_4216_groups_0"), val = int32(1)]; tensor var_4216_strides_0 = const()[name = string("op_4216_strides_0"), val = tensor([1])]; tensor var_4216_pad_0 = const()[name = string("op_4216_pad_0"), val = tensor([0, 0])]; tensor var_4216_dilations_0 = const()[name = string("op_4216_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886387392))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4201_cast_fp16 = transpose(perm = var_4200, x = attn_output_65_cast_fp16)[name = string("transpose_69")]; tensor var_4216_cast_fp16 = conv(dilations = var_4216_dilations_0, groups = var_4216_groups_0, pad = var_4216_pad_0, pad_type = var_4216_pad_type_0, strides = var_4216_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4201_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor var_4220 = const()[name = string("op_4220"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_4220, x = var_4216_cast_fp16)[name = string("transpose_68")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_4239_axes_0 = const()[name = string("op_4239_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886397696)))]; fp16 var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4239_cast_fp16 = layer_norm(axes = var_4239_axes_0, epsilon = var_4227_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_4239_cast_fp16")]; tensor var_4253 = const()[name = string("op_4253"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4254 = transpose(perm = var_4253, x = var_4239_cast_fp16)[name = string("transpose_67")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4254)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_4276_axes_0 = const()[name = string("op_4276_axes_0"), val = tensor([2])]; tensor var_4276 = squeeze(axes = var_4276_axes_0, x = e_13)[name = string("op_4276")]; tensor var_4277 = const()[name = string("op_4277"), val = tensor([0, 2, 1])]; tensor var_4278 = transpose(perm = var_4277, x = var_4276)[name = string("transpose_66")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_4278)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886402880)))]; fp16 var_4284_to_fp16 = const()[name = string("op_4284_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, epsilon = var_4284_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_4296_cast_fp16")]; tensor var_4302 = const()[name = string("op_4302"), val = tensor([0, 2, 1])]; tensor var_4305_axes_0 = const()[name = string("op_4305_axes_0"), val = tensor([2])]; tensor var_4303 = transpose(perm = var_4302, x = var_4296_cast_fp16)[name = string("transpose_65")]; tensor var_4305 = expand_dims(axes = var_4305_axes_0, x = var_4303)[name = string("op_4305")]; string var_4321_pad_type_0 = const()[name = string("op_4321_pad_type_0"), val = string("valid")]; tensor var_4321_strides_0 = const()[name = string("op_4321_strides_0"), val = tensor([1, 1])]; tensor var_4321_pad_0 = const()[name = string("op_4321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4321_dilations_0 = const()[name = string("op_4321_dilations_0"), val = tensor([1, 1])]; int32 var_4321_groups_0 = const()[name = string("op_4321_groups_0"), val = int32(1)]; tensor var_4321 = conv(dilations = var_4321_dilations_0, groups = var_4321_groups_0, pad = var_4321_pad_0, pad_type = var_4321_pad_type_0, strides = var_4321_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_4305)[name = string("op_4321")]; tensor var_4326 = const()[name = string("op_4326"), val = tensor([1, 32, 1, 128])]; tensor var_4327 = reshape(shape = var_4326, x = var_4321)[name = string("op_4327")]; string var_4343_pad_type_0 = const()[name = string("op_4343_pad_type_0"), val = string("valid")]; tensor var_4343_strides_0 = const()[name = string("op_4343_strides_0"), val = tensor([1, 1])]; tensor var_4343_pad_0 = const()[name = string("op_4343_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4343_dilations_0 = const()[name = string("op_4343_dilations_0"), val = tensor([1, 1])]; int32 var_4343_groups_0 = const()[name = string("op_4343_groups_0"), val = int32(1)]; tensor var_4343 = conv(dilations = var_4343_dilations_0, groups = var_4343_groups_0, pad = var_4343_pad_0, pad_type = var_4343_pad_type_0, strides = var_4343_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_4305)[name = string("op_4343")]; tensor var_4348 = const()[name = string("op_4348"), val = tensor([1, 8, 1, 128])]; tensor var_4349 = reshape(shape = var_4348, x = var_4343)[name = string("op_4349")]; string var_4365_pad_type_0 = const()[name = string("op_4365_pad_type_0"), val = string("valid")]; tensor var_4365_strides_0 = const()[name = string("op_4365_strides_0"), val = tensor([1, 1])]; tensor var_4365_pad_0 = const()[name = string("op_4365_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4365_dilations_0 = const()[name = string("op_4365_dilations_0"), val = tensor([1, 1])]; int32 var_4365_groups_0 = const()[name = string("op_4365_groups_0"), val = int32(1)]; tensor var_4365 = conv(dilations = var_4365_dilations_0, groups = var_4365_groups_0, pad = var_4365_pad_0, pad_type = var_4365_pad_type_0, strides = var_4365_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_4305)[name = string("op_4365")]; tensor var_4370 = const()[name = string("op_4370"), val = tensor([1, 8, 1, 128])]; tensor var_4371 = reshape(shape = var_4370, x = var_4365)[name = string("op_4371")]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = var_4327)[name = string("mean_59")]; tensor input_131 = sub(x = var_4327, y = mean_59)[name = string("input_131")]; tensor var_4392_axes_0 = const()[name = string("op_4392_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408064)))]; fp16 var_4380_to_fp16 = const()[name = string("op_4380_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4392_cast_fp16 = layer_norm(axes = var_4392_axes_0, epsilon = var_4380_to_fp16, gamma = model_model_layers_7_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_4392_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = var_4349)[name = string("mean_61")]; tensor input_133 = sub(x = var_4349, y = mean_61)[name = string("input_133")]; tensor var_4410_axes_0 = const()[name = string("op_4410_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408384)))]; fp16 var_4398_to_fp16 = const()[name = string("op_4398_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4410_cast_fp16 = layer_norm(axes = var_4410_axes_0, epsilon = var_4398_to_fp16, gamma = model_model_layers_7_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_4410_cast_fp16")]; tensor var_4413 = mul(x = var_4392_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4413")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_4392_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_4392_cast_fp16)[name = string("x2_29")]; fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; tensor var_4434 = mul(x = x2_29, y = const_131_promoted)[name = string("op_4434")]; int32 var_4436 = const()[name = string("op_4436"), val = int32(-1)]; bool var_4437_interleave_0 = const()[name = string("op_4437_interleave_0"), val = bool(false)]; tensor var_4437 = concat(axis = var_4436, interleave = var_4437_interleave_0, values = (var_4434, x1_29))[name = string("op_4437")]; tensor var_4438 = mul(x = var_4437, y = sin_1_cast_fp16)[name = string("op_4438")]; tensor query_states_29 = add(x = var_4413, y = var_4438)[name = string("query_states_29")]; tensor var_4441 = mul(x = var_4410_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4441")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_4410_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_4410_cast_fp16)[name = string("x2_31")]; fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; tensor var_4462 = mul(x = x2_31, y = const_134_promoted)[name = string("op_4462")]; int32 var_4464 = const()[name = string("op_4464"), val = int32(-1)]; bool var_4465_interleave_0 = const()[name = string("op_4465_interleave_0"), val = bool(false)]; tensor var_4465 = concat(axis = var_4464, interleave = var_4465_interleave_0, values = (var_4462, x1_31))[name = string("op_4465")]; tensor var_4466 = mul(x = var_4465, y = sin_1_cast_fp16)[name = string("op_4466")]; tensor key_states_29 = add(x = var_4441, y = var_4466)[name = string("key_states_29")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1195, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_29, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([43])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([44])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1195, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_4371, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_4521_begin_0 = const()[name = string("op_4521_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_4521_end_0 = const()[name = string("op_4521_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_4521_end_mask_0 = const()[name = string("op_4521_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4521_cast_fp16 = slice_by_index(begin = var_4521_begin_0, end = var_4521_end_0, end_mask = var_4521_end_mask_0, x = coreml_update_state_51)[name = string("op_4521_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_4521_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_4528_begin_0 = const()[name = string("op_4528_begin_0"), val = tensor([43, 0, 0, 0])]; tensor var_4528_end_0 = const()[name = string("op_4528_end_0"), val = tensor([44, 8, 1024, 128])]; tensor var_4528_end_mask_0 = const()[name = string("op_4528_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4528_cast_fp16 = slice_by_index(begin = var_4528_begin_0, end = var_4528_end_0, end_mask = var_4528_end_mask_0, x = coreml_update_state_51)[name = string("op_4528_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_4528_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_4565 = const()[name = string("op_4565"), val = tensor([1, 4, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_4565, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_4577 = const()[name = string("op_4577"), val = tensor([1, -1, 1024, 128])]; tensor key_states_31_cast_fp16 = reshape(shape = var_4577, x = x_149_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_4585 = const()[name = string("op_4585"), val = tensor([1, 4, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_4585, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; tensor var_4597 = const()[name = string("op_4597"), val = tensor([1, -1, 1024, 128])]; tensor value_states_45_cast_fp16 = reshape(shape = var_4597, x = x_155_cast_fp16)[name = string("value_states_45_cast_fp16")]; bool var_4612_transpose_x_1 = const()[name = string("op_4612_transpose_x_1"), val = bool(false)]; bool var_4612_transpose_y_1 = const()[name = string("op_4612_transpose_y_1"), val = bool(true)]; tensor var_4612 = matmul(transpose_x = var_4612_transpose_x_1, transpose_y = var_4612_transpose_y_1, x = query_states_29, y = key_states_31_cast_fp16)[name = string("op_4612")]; fp16 var_4613_to_fp16 = const()[name = string("op_4613_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_4612, y = var_4613_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_4648 = const()[name = string("op_4648"), val = int32(-1)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_4648, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = value_states_45_cast_fp16)[name = string("attn_output_71_cast_fp16")]; tensor var_4659_perm_0 = const()[name = string("op_4659_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4663 = const()[name = string("op_4663"), val = tensor([1, 1, 4096])]; tensor var_4659_cast_fp16 = transpose(perm = var_4659_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_64")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_4663, x = var_4659_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_4668 = const()[name = string("op_4668"), val = tensor([0, 2, 1])]; string var_4684_pad_type_0 = const()[name = string("op_4684_pad_type_0"), val = string("valid")]; int32 var_4684_groups_0 = const()[name = string("op_4684_groups_0"), val = int32(1)]; tensor var_4684_strides_0 = const()[name = string("op_4684_strides_0"), val = tensor([1])]; tensor var_4684_pad_0 = const()[name = string("op_4684_pad_0"), val = tensor([0, 0])]; tensor var_4684_dilations_0 = const()[name = string("op_4684_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891651648))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4669_cast_fp16 = transpose(perm = var_4668, x = attn_output_75_cast_fp16)[name = string("transpose_63")]; tensor var_4684_cast_fp16 = conv(dilations = var_4684_dilations_0, groups = var_4684_groups_0, pad = var_4684_pad_0, pad_type = var_4684_pad_type_0, strides = var_4684_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_4669_cast_fp16)[name = string("op_4684_cast_fp16")]; tensor var_4688 = const()[name = string("op_4688"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_4688, x = var_4684_cast_fp16)[name = string("transpose_62")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_4707_axes_0 = const()[name = string("op_4707_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891661952)))]; fp16 var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4707_cast_fp16 = layer_norm(axes = var_4707_axes_0, epsilon = var_4695_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_4707_cast_fp16")]; tensor var_4721 = const()[name = string("op_4721"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_4722 = transpose(perm = var_4721, x = var_4707_cast_fp16)[name = string("transpose_61")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_4722)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_4744_axes_0 = const()[name = string("op_4744_axes_0"), val = tensor([2])]; tensor var_4744 = squeeze(axes = var_4744_axes_0, x = e_15)[name = string("op_4744")]; tensor var_4745 = const()[name = string("op_4745"), val = tensor([0, 2, 1])]; tensor var_4746 = transpose(perm = var_4745, x = var_4744)[name = string("transpose_60")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_4746)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_4764_axes_0 = const()[name = string("op_4764_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891667136)))]; fp16 var_4752_to_fp16 = const()[name = string("op_4752_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4764_cast_fp16 = layer_norm(axes = var_4764_axes_0, epsilon = var_4752_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_4764_cast_fp16")]; tensor var_4770 = const()[name = string("op_4770"), val = tensor([0, 2, 1])]; tensor var_4773_axes_0 = const()[name = string("op_4773_axes_0"), val = tensor([2])]; tensor var_4771 = transpose(perm = var_4770, x = var_4764_cast_fp16)[name = string("transpose_59")]; tensor var_4773 = expand_dims(axes = var_4773_axes_0, x = var_4771)[name = string("op_4773")]; string var_4789_pad_type_0 = const()[name = string("op_4789_pad_type_0"), val = string("valid")]; tensor var_4789_strides_0 = const()[name = string("op_4789_strides_0"), val = tensor([1, 1])]; tensor var_4789_pad_0 = const()[name = string("op_4789_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4789_dilations_0 = const()[name = string("op_4789_dilations_0"), val = tensor([1, 1])]; int32 var_4789_groups_0 = const()[name = string("op_4789_groups_0"), val = int32(1)]; tensor var_4789 = conv(dilations = var_4789_dilations_0, groups = var_4789_groups_0, pad = var_4789_pad_0, pad_type = var_4789_pad_type_0, strides = var_4789_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_4773)[name = string("op_4789")]; tensor var_4794 = const()[name = string("op_4794"), val = tensor([1, 32, 1, 128])]; tensor var_4795 = reshape(shape = var_4794, x = var_4789)[name = string("op_4795")]; string var_4811_pad_type_0 = const()[name = string("op_4811_pad_type_0"), val = string("valid")]; tensor var_4811_strides_0 = const()[name = string("op_4811_strides_0"), val = tensor([1, 1])]; tensor var_4811_pad_0 = const()[name = string("op_4811_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4811_dilations_0 = const()[name = string("op_4811_dilations_0"), val = tensor([1, 1])]; int32 var_4811_groups_0 = const()[name = string("op_4811_groups_0"), val = int32(1)]; tensor var_4811 = conv(dilations = var_4811_dilations_0, groups = var_4811_groups_0, pad = var_4811_pad_0, pad_type = var_4811_pad_type_0, strides = var_4811_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_4773)[name = string("op_4811")]; tensor var_4816 = const()[name = string("op_4816"), val = tensor([1, 8, 1, 128])]; tensor var_4817 = reshape(shape = var_4816, x = var_4811)[name = string("op_4817")]; string var_4833_pad_type_0 = const()[name = string("op_4833_pad_type_0"), val = string("valid")]; tensor var_4833_strides_0 = const()[name = string("op_4833_strides_0"), val = tensor([1, 1])]; tensor var_4833_pad_0 = const()[name = string("op_4833_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4833_dilations_0 = const()[name = string("op_4833_dilations_0"), val = tensor([1, 1])]; int32 var_4833_groups_0 = const()[name = string("op_4833_groups_0"), val = int32(1)]; tensor var_4833 = conv(dilations = var_4833_dilations_0, groups = var_4833_groups_0, pad = var_4833_pad_0, pad_type = var_4833_pad_type_0, strides = var_4833_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_4773)[name = string("op_4833")]; tensor var_4838 = const()[name = string("op_4838"), val = tensor([1, 8, 1, 128])]; tensor var_4839 = reshape(shape = var_4838, x = var_4833)[name = string("op_4839")]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = var_4795)[name = string("mean_67")]; tensor input_149 = sub(x = var_4795, y = mean_67)[name = string("input_149")]; tensor var_4860_axes_0 = const()[name = string("op_4860_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672320)))]; fp16 var_4848_to_fp16 = const()[name = string("op_4848_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4860_cast_fp16 = layer_norm(axes = var_4860_axes_0, epsilon = var_4848_to_fp16, gamma = model_model_layers_8_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_4860_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = var_4817)[name = string("mean_69")]; tensor input_151 = sub(x = var_4817, y = mean_69)[name = string("input_151")]; tensor var_4878_axes_0 = const()[name = string("op_4878_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672640)))]; fp16 var_4866_to_fp16 = const()[name = string("op_4866_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4878_cast_fp16 = layer_norm(axes = var_4878_axes_0, epsilon = var_4866_to_fp16, gamma = model_model_layers_8_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_4878_cast_fp16")]; tensor var_4881 = mul(x = var_4860_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4881")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_4860_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_4860_cast_fp16)[name = string("x2_33")]; fp16 const_149_promoted = const()[name = string("const_149_promoted"), val = fp16(-0x1p+0)]; tensor var_4902 = mul(x = x2_33, y = const_149_promoted)[name = string("op_4902")]; int32 var_4904 = const()[name = string("op_4904"), val = int32(-1)]; bool var_4905_interleave_0 = const()[name = string("op_4905_interleave_0"), val = bool(false)]; tensor var_4905 = concat(axis = var_4904, interleave = var_4905_interleave_0, values = (var_4902, x1_33))[name = string("op_4905")]; tensor var_4906 = mul(x = var_4905, y = sin_1_cast_fp16)[name = string("op_4906")]; tensor query_states_33 = add(x = var_4881, y = var_4906)[name = string("query_states_33")]; tensor var_4909 = mul(x = var_4878_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4909")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_4878_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_4878_cast_fp16)[name = string("x2_35")]; fp16 const_152_promoted = const()[name = string("const_152_promoted"), val = fp16(-0x1p+0)]; tensor var_4930 = mul(x = x2_35, y = const_152_promoted)[name = string("op_4930")]; int32 var_4932 = const()[name = string("op_4932"), val = int32(-1)]; bool var_4933_interleave_0 = const()[name = string("op_4933_interleave_0"), val = bool(false)]; tensor var_4933 = concat(axis = var_4932, interleave = var_4933_interleave_0, values = (var_4930, x1_35))[name = string("op_4933")]; tensor var_4934 = mul(x = var_4933, y = sin_1_cast_fp16)[name = string("op_4934")]; tensor key_states_33 = add(x = var_4909, y = var_4934)[name = string("key_states_33")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1195, concat_67_values3_0))[name = string("concat_67")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_33, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([44])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([45])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1195, concat_71_values3_0))[name = string("concat_71")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_4839, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_4989_begin_0 = const()[name = string("op_4989_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_4989_end_0 = const()[name = string("op_4989_end_0"), val = tensor([9, 8, 1024, 128])]; tensor var_4989_end_mask_0 = const()[name = string("op_4989_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4989_cast_fp16 = slice_by_index(begin = var_4989_begin_0, end = var_4989_end_0, end_mask = var_4989_end_mask_0, x = coreml_update_state_53)[name = string("op_4989_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_4989_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_4996_begin_0 = const()[name = string("op_4996_begin_0"), val = tensor([44, 0, 0, 0])]; tensor var_4996_end_0 = const()[name = string("op_4996_end_0"), val = tensor([45, 8, 1024, 128])]; tensor var_4996_end_mask_0 = const()[name = string("op_4996_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4996_cast_fp16 = slice_by_index(begin = var_4996_begin_0, end = var_4996_end_0, end_mask = var_4996_end_mask_0, x = coreml_update_state_53)[name = string("op_4996_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_4996_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5033 = const()[name = string("op_5033"), val = tensor([1, 4, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5033, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5045 = const()[name = string("op_5045"), val = tensor([1, -1, 1024, 128])]; tensor key_states_35_cast_fp16 = reshape(shape = var_5045, x = x_169_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5053 = const()[name = string("op_5053"), val = tensor([1, 4, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5053, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_5065 = const()[name = string("op_5065"), val = tensor([1, -1, 1024, 128])]; tensor value_states_51_cast_fp16 = reshape(shape = var_5065, x = x_175_cast_fp16)[name = string("value_states_51_cast_fp16")]; bool var_5080_transpose_x_1 = const()[name = string("op_5080_transpose_x_1"), val = bool(false)]; bool var_5080_transpose_y_1 = const()[name = string("op_5080_transpose_y_1"), val = bool(true)]; tensor var_5080 = matmul(transpose_x = var_5080_transpose_x_1, transpose_y = var_5080_transpose_y_1, x = query_states_33, y = key_states_35_cast_fp16)[name = string("op_5080")]; fp16 var_5081_to_fp16 = const()[name = string("op_5081_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_5080, y = var_5081_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_5116 = const()[name = string("op_5116"), val = int32(-1)]; tensor attn_weights_53_cast_fp16 = softmax(axis = var_5116, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_5127_perm_0 = const()[name = string("op_5127_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5131 = const()[name = string("op_5131"), val = tensor([1, 1, 4096])]; tensor var_5127_cast_fp16 = transpose(perm = var_5127_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_58")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_5131, x = var_5127_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_5136 = const()[name = string("op_5136"), val = tensor([0, 2, 1])]; string var_5152_pad_type_0 = const()[name = string("op_5152_pad_type_0"), val = string("valid")]; int32 var_5152_groups_0 = const()[name = string("op_5152_groups_0"), val = int32(1)]; tensor var_5152_strides_0 = const()[name = string("op_5152_strides_0"), val = tensor([1])]; tensor var_5152_pad_0 = const()[name = string("op_5152_pad_0"), val = tensor([0, 0])]; tensor var_5152_dilations_0 = const()[name = string("op_5152_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896915904))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5137_cast_fp16 = transpose(perm = var_5136, x = attn_output_85_cast_fp16)[name = string("transpose_57")]; tensor var_5152_cast_fp16 = conv(dilations = var_5152_dilations_0, groups = var_5152_groups_0, pad = var_5152_pad_0, pad_type = var_5152_pad_type_0, strides = var_5152_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5137_cast_fp16)[name = string("op_5152_cast_fp16")]; tensor var_5156 = const()[name = string("op_5156"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_5156, x = var_5152_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_5175_axes_0 = const()[name = string("op_5175_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896926208)))]; fp16 var_5163_to_fp16 = const()[name = string("op_5163_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5175_cast_fp16 = layer_norm(axes = var_5175_axes_0, epsilon = var_5163_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_5175_cast_fp16")]; tensor var_5189 = const()[name = string("op_5189"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_5190 = transpose(perm = var_5189, x = var_5175_cast_fp16)[name = string("transpose_55")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5190)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_5212_axes_0 = const()[name = string("op_5212_axes_0"), val = tensor([2])]; tensor var_5212 = squeeze(axes = var_5212_axes_0, x = e_17)[name = string("op_5212")]; tensor var_5213 = const()[name = string("op_5213"), val = tensor([0, 2, 1])]; tensor var_5214 = transpose(perm = var_5213, x = var_5212)[name = string("transpose_54")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_5214)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_5232_axes_0 = const()[name = string("op_5232_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896931392)))]; fp16 var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5232_cast_fp16 = layer_norm(axes = var_5232_axes_0, epsilon = var_5220_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_5232_cast_fp16")]; tensor var_5238 = const()[name = string("op_5238"), val = tensor([0, 2, 1])]; tensor var_5241_axes_0 = const()[name = string("op_5241_axes_0"), val = tensor([2])]; tensor var_5239 = transpose(perm = var_5238, x = var_5232_cast_fp16)[name = string("transpose_53")]; tensor var_5241 = expand_dims(axes = var_5241_axes_0, x = var_5239)[name = string("op_5241")]; string var_5257_pad_type_0 = const()[name = string("op_5257_pad_type_0"), val = string("valid")]; tensor var_5257_strides_0 = const()[name = string("op_5257_strides_0"), val = tensor([1, 1])]; tensor var_5257_pad_0 = const()[name = string("op_5257_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5257_dilations_0 = const()[name = string("op_5257_dilations_0"), val = tensor([1, 1])]; int32 var_5257_groups_0 = const()[name = string("op_5257_groups_0"), val = int32(1)]; tensor var_5257 = conv(dilations = var_5257_dilations_0, groups = var_5257_groups_0, pad = var_5257_pad_0, pad_type = var_5257_pad_type_0, strides = var_5257_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_5241)[name = string("op_5257")]; tensor var_5262 = const()[name = string("op_5262"), val = tensor([1, 32, 1, 128])]; tensor var_5263 = reshape(shape = var_5262, x = var_5257)[name = string("op_5263")]; string var_5279_pad_type_0 = const()[name = string("op_5279_pad_type_0"), val = string("valid")]; tensor var_5279_strides_0 = const()[name = string("op_5279_strides_0"), val = tensor([1, 1])]; tensor var_5279_pad_0 = const()[name = string("op_5279_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5279_dilations_0 = const()[name = string("op_5279_dilations_0"), val = tensor([1, 1])]; int32 var_5279_groups_0 = const()[name = string("op_5279_groups_0"), val = int32(1)]; tensor var_5279 = conv(dilations = var_5279_dilations_0, groups = var_5279_groups_0, pad = var_5279_pad_0, pad_type = var_5279_pad_type_0, strides = var_5279_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_5241)[name = string("op_5279")]; tensor var_5284 = const()[name = string("op_5284"), val = tensor([1, 8, 1, 128])]; tensor var_5285 = reshape(shape = var_5284, x = var_5279)[name = string("op_5285")]; string var_5301_pad_type_0 = const()[name = string("op_5301_pad_type_0"), val = string("valid")]; tensor var_5301_strides_0 = const()[name = string("op_5301_strides_0"), val = tensor([1, 1])]; tensor var_5301_pad_0 = const()[name = string("op_5301_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5301_dilations_0 = const()[name = string("op_5301_dilations_0"), val = tensor([1, 1])]; int32 var_5301_groups_0 = const()[name = string("op_5301_groups_0"), val = int32(1)]; tensor var_5301 = conv(dilations = var_5301_dilations_0, groups = var_5301_groups_0, pad = var_5301_pad_0, pad_type = var_5301_pad_type_0, strides = var_5301_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_5241)[name = string("op_5301")]; tensor var_5306 = const()[name = string("op_5306"), val = tensor([1, 8, 1, 128])]; tensor var_5307 = reshape(shape = var_5306, x = var_5301)[name = string("op_5307")]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = var_5263)[name = string("mean_75")]; tensor input_167 = sub(x = var_5263, y = mean_75)[name = string("input_167")]; tensor var_5328_axes_0 = const()[name = string("op_5328_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936576)))]; fp16 var_5316_to_fp16 = const()[name = string("op_5316_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5328_cast_fp16 = layer_norm(axes = var_5328_axes_0, epsilon = var_5316_to_fp16, gamma = model_model_layers_9_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_5328_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = var_5285)[name = string("mean_77")]; tensor input_169 = sub(x = var_5285, y = mean_77)[name = string("input_169")]; tensor var_5346_axes_0 = const()[name = string("op_5346_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936896)))]; fp16 var_5334_to_fp16 = const()[name = string("op_5334_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5346_cast_fp16 = layer_norm(axes = var_5346_axes_0, epsilon = var_5334_to_fp16, gamma = model_model_layers_9_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_5346_cast_fp16")]; tensor var_5349 = mul(x = var_5328_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5349")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_5328_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_5328_cast_fp16)[name = string("x2_37")]; fp16 const_167_promoted = const()[name = string("const_167_promoted"), val = fp16(-0x1p+0)]; tensor var_5370 = mul(x = x2_37, y = const_167_promoted)[name = string("op_5370")]; int32 var_5372 = const()[name = string("op_5372"), val = int32(-1)]; bool var_5373_interleave_0 = const()[name = string("op_5373_interleave_0"), val = bool(false)]; tensor var_5373 = concat(axis = var_5372, interleave = var_5373_interleave_0, values = (var_5370, x1_37))[name = string("op_5373")]; tensor var_5374 = mul(x = var_5373, y = sin_1_cast_fp16)[name = string("op_5374")]; tensor query_states_37 = add(x = var_5349, y = var_5374)[name = string("query_states_37")]; tensor var_5377 = mul(x = var_5346_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5377")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_5346_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_5346_cast_fp16)[name = string("x2_39")]; fp16 const_170_promoted = const()[name = string("const_170_promoted"), val = fp16(-0x1p+0)]; tensor var_5398 = mul(x = x2_39, y = const_170_promoted)[name = string("op_5398")]; int32 var_5400 = const()[name = string("op_5400"), val = int32(-1)]; bool var_5401_interleave_0 = const()[name = string("op_5401_interleave_0"), val = bool(false)]; tensor var_5401 = concat(axis = var_5400, interleave = var_5401_interleave_0, values = (var_5398, x1_39))[name = string("op_5401")]; tensor var_5402 = mul(x = var_5401, y = sin_1_cast_fp16)[name = string("op_5402")]; tensor key_states_37 = add(x = var_5377, y = var_5402)[name = string("key_states_37")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1195, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_37, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([45])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([46])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1195, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = var_5307, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_5457_begin_0 = const()[name = string("op_5457_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_5457_end_0 = const()[name = string("op_5457_end_0"), val = tensor([10, 8, 1024, 128])]; tensor var_5457_end_mask_0 = const()[name = string("op_5457_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5457_cast_fp16 = slice_by_index(begin = var_5457_begin_0, end = var_5457_end_0, end_mask = var_5457_end_mask_0, x = coreml_update_state_55)[name = string("op_5457_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_5457_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_5464_begin_0 = const()[name = string("op_5464_begin_0"), val = tensor([45, 0, 0, 0])]; tensor var_5464_end_0 = const()[name = string("op_5464_end_0"), val = tensor([46, 8, 1024, 128])]; tensor var_5464_end_mask_0 = const()[name = string("op_5464_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5464_cast_fp16 = slice_by_index(begin = var_5464_begin_0, end = var_5464_end_0, end_mask = var_5464_end_mask_0, x = coreml_update_state_55)[name = string("op_5464_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_5464_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_5501 = const()[name = string("op_5501"), val = tensor([1, 4, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_5501, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_5513 = const()[name = string("op_5513"), val = tensor([1, -1, 1024, 128])]; tensor key_states_39_cast_fp16 = reshape(shape = var_5513, x = x_189_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_5521 = const()[name = string("op_5521"), val = tensor([1, 4, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_5521, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_5533 = const()[name = string("op_5533"), val = tensor([1, -1, 1024, 128])]; tensor value_states_57_cast_fp16 = reshape(shape = var_5533, x = x_195_cast_fp16)[name = string("value_states_57_cast_fp16")]; bool var_5548_transpose_x_1 = const()[name = string("op_5548_transpose_x_1"), val = bool(false)]; bool var_5548_transpose_y_1 = const()[name = string("op_5548_transpose_y_1"), val = bool(true)]; tensor var_5548 = matmul(transpose_x = var_5548_transpose_x_1, transpose_y = var_5548_transpose_y_1, x = query_states_37, y = key_states_39_cast_fp16)[name = string("op_5548")]; fp16 var_5549_to_fp16 = const()[name = string("op_5549_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_5548, y = var_5549_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_5584 = const()[name = string("op_5584"), val = int32(-1)]; tensor attn_weights_59_cast_fp16 = softmax(axis = var_5584, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = value_states_57_cast_fp16)[name = string("attn_output_91_cast_fp16")]; tensor var_5595_perm_0 = const()[name = string("op_5595_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5599 = const()[name = string("op_5599"), val = tensor([1, 1, 4096])]; tensor var_5595_cast_fp16 = transpose(perm = var_5595_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_52")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_5599, x = var_5595_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_5604 = const()[name = string("op_5604"), val = tensor([0, 2, 1])]; string var_5620_pad_type_0 = const()[name = string("op_5620_pad_type_0"), val = string("valid")]; int32 var_5620_groups_0 = const()[name = string("op_5620_groups_0"), val = int32(1)]; tensor var_5620_strides_0 = const()[name = string("op_5620_strides_0"), val = tensor([1])]; tensor var_5620_pad_0 = const()[name = string("op_5620_pad_0"), val = tensor([0, 0])]; tensor var_5620_dilations_0 = const()[name = string("op_5620_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896937216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902180160))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5605_cast_fp16 = transpose(perm = var_5604, x = attn_output_95_cast_fp16)[name = string("transpose_51")]; tensor var_5620_cast_fp16 = conv(dilations = var_5620_dilations_0, groups = var_5620_groups_0, pad = var_5620_pad_0, pad_type = var_5620_pad_type_0, strides = var_5620_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_5605_cast_fp16)[name = string("op_5620_cast_fp16")]; tensor var_5624 = const()[name = string("op_5624"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_5624, x = var_5620_cast_fp16)[name = string("transpose_50")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_5643_axes_0 = const()[name = string("op_5643_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902190464)))]; fp16 var_5631_to_fp16 = const()[name = string("op_5631_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5643_cast_fp16 = layer_norm(axes = var_5643_axes_0, epsilon = var_5631_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_5643_cast_fp16")]; tensor var_5657 = const()[name = string("op_5657"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_5658 = transpose(perm = var_5657, x = var_5643_cast_fp16)[name = string("transpose_49")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_5658)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_5680_axes_0 = const()[name = string("op_5680_axes_0"), val = tensor([2])]; tensor var_5680 = squeeze(axes = var_5680_axes_0, x = e_19)[name = string("op_5680")]; tensor var_5681 = const()[name = string("op_5681"), val = tensor([0, 2, 1])]; tensor var_5682 = transpose(perm = var_5681, x = var_5680)[name = string("transpose_48")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_5682)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_5700_axes_0 = const()[name = string("op_5700_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902195648)))]; fp16 var_5688_to_fp16 = const()[name = string("op_5688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5700_cast_fp16 = layer_norm(axes = var_5700_axes_0, epsilon = var_5688_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_5700_cast_fp16")]; tensor var_5706 = const()[name = string("op_5706"), val = tensor([0, 2, 1])]; tensor var_5709_axes_0 = const()[name = string("op_5709_axes_0"), val = tensor([2])]; tensor var_5707 = transpose(perm = var_5706, x = var_5700_cast_fp16)[name = string("transpose_47")]; tensor var_5709 = expand_dims(axes = var_5709_axes_0, x = var_5707)[name = string("op_5709")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5725 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_5709)[name = string("op_5725")]; tensor var_5730 = const()[name = string("op_5730"), val = tensor([1, 32, 1, 128])]; tensor var_5731 = reshape(shape = var_5730, x = var_5725)[name = string("op_5731")]; string var_5747_pad_type_0 = const()[name = string("op_5747_pad_type_0"), val = string("valid")]; tensor var_5747_strides_0 = const()[name = string("op_5747_strides_0"), val = tensor([1, 1])]; tensor var_5747_pad_0 = const()[name = string("op_5747_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5747_dilations_0 = const()[name = string("op_5747_dilations_0"), val = tensor([1, 1])]; int32 var_5747_groups_0 = const()[name = string("op_5747_groups_0"), val = int32(1)]; tensor var_5747 = conv(dilations = var_5747_dilations_0, groups = var_5747_groups_0, pad = var_5747_pad_0, pad_type = var_5747_pad_type_0, strides = var_5747_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_5709)[name = string("op_5747")]; tensor var_5752 = const()[name = string("op_5752"), val = tensor([1, 8, 1, 128])]; tensor var_5753 = reshape(shape = var_5752, x = var_5747)[name = string("op_5753")]; string var_5769_pad_type_0 = const()[name = string("op_5769_pad_type_0"), val = string("valid")]; tensor var_5769_strides_0 = const()[name = string("op_5769_strides_0"), val = tensor([1, 1])]; tensor var_5769_pad_0 = const()[name = string("op_5769_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5769_dilations_0 = const()[name = string("op_5769_dilations_0"), val = tensor([1, 1])]; int32 var_5769_groups_0 = const()[name = string("op_5769_groups_0"), val = int32(1)]; tensor var_5769 = conv(dilations = var_5769_dilations_0, groups = var_5769_groups_0, pad = var_5769_pad_0, pad_type = var_5769_pad_type_0, strides = var_5769_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_5709)[name = string("op_5769")]; tensor var_5774 = const()[name = string("op_5774"), val = tensor([1, 8, 1, 128])]; tensor var_5775 = reshape(shape = var_5774, x = var_5769)[name = string("op_5775")]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = var_5731)[name = string("mean_83")]; tensor input_185 = sub(x = var_5731, y = mean_83)[name = string("input_185")]; tensor var_5796_axes_0 = const()[name = string("op_5796_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902200832)))]; fp16 var_5784_to_fp16 = const()[name = string("op_5784_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5796_cast_fp16 = layer_norm(axes = var_5796_axes_0, epsilon = var_5784_to_fp16, gamma = model_model_layers_10_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_5796_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = var_5753)[name = string("mean_85")]; tensor input_187 = sub(x = var_5753, y = mean_85)[name = string("input_187")]; tensor var_5814_axes_0 = const()[name = string("op_5814_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201152)))]; fp16 var_5802_to_fp16 = const()[name = string("op_5802_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5814_cast_fp16 = layer_norm(axes = var_5814_axes_0, epsilon = var_5802_to_fp16, gamma = model_model_layers_10_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_5814_cast_fp16")]; tensor var_5817 = mul(x = var_5796_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5817")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_5796_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_5796_cast_fp16)[name = string("x2_41")]; fp16 const_185_promoted = const()[name = string("const_185_promoted"), val = fp16(-0x1p+0)]; tensor var_5838 = mul(x = x2_41, y = const_185_promoted)[name = string("op_5838")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838, x1_41))[name = string("op_5841")]; tensor var_5842 = mul(x = var_5841, y = sin_1_cast_fp16)[name = string("op_5842")]; tensor query_states_41 = add(x = var_5817, y = var_5842)[name = string("query_states_41")]; tensor var_5845 = mul(x = var_5814_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5845")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_5814_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_5814_cast_fp16)[name = string("x2_43")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_5866 = mul(x = x2_43, y = const_188_promoted)[name = string("op_5866")]; int32 var_5868 = const()[name = string("op_5868"), val = int32(-1)]; bool var_5869_interleave_0 = const()[name = string("op_5869_interleave_0"), val = bool(false)]; tensor var_5869 = concat(axis = var_5868, interleave = var_5869_interleave_0, values = (var_5866, x1_43))[name = string("op_5869")]; tensor var_5870 = mul(x = var_5869, y = sin_1_cast_fp16)[name = string("op_5870")]; tensor key_states_41 = add(x = var_5845, y = var_5870)[name = string("key_states_41")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1195, concat_83_values3_0))[name = string("concat_83")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_41, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([46])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([47])]; int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1195, concat_87_values3_0))[name = string("concat_87")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = var_5775, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_5925_begin_0 = const()[name = string("op_5925_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_5925_end_0 = const()[name = string("op_5925_end_0"), val = tensor([11, 8, 1024, 128])]; tensor var_5925_end_mask_0 = const()[name = string("op_5925_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5925_cast_fp16 = slice_by_index(begin = var_5925_begin_0, end = var_5925_end_0, end_mask = var_5925_end_mask_0, x = coreml_update_state_57)[name = string("op_5925_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_5925_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_5932_begin_0 = const()[name = string("op_5932_begin_0"), val = tensor([46, 0, 0, 0])]; tensor var_5932_end_0 = const()[name = string("op_5932_end_0"), val = tensor([47, 8, 1024, 128])]; tensor var_5932_end_mask_0 = const()[name = string("op_5932_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5932_cast_fp16 = slice_by_index(begin = var_5932_begin_0, end = var_5932_end_0, end_mask = var_5932_end_mask_0, x = coreml_update_state_57)[name = string("op_5932_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_5932_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_5969 = const()[name = string("op_5969"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_5969, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_5981 = const()[name = string("op_5981"), val = tensor([1, -1, 1024, 128])]; tensor key_states_43_cast_fp16 = reshape(shape = var_5981, x = x_209_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_5989 = const()[name = string("op_5989"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_5989, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_6001 = const()[name = string("op_6001"), val = tensor([1, -1, 1024, 128])]; tensor value_states_63_cast_fp16 = reshape(shape = var_6001, x = x_215_cast_fp16)[name = string("value_states_63_cast_fp16")]; bool var_6016_transpose_x_1 = const()[name = string("op_6016_transpose_x_1"), val = bool(false)]; bool var_6016_transpose_y_1 = const()[name = string("op_6016_transpose_y_1"), val = bool(true)]; tensor var_6016 = matmul(transpose_x = var_6016_transpose_x_1, transpose_y = var_6016_transpose_y_1, x = query_states_41, y = key_states_43_cast_fp16)[name = string("op_6016")]; fp16 var_6017_to_fp16 = const()[name = string("op_6017_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_6016, y = var_6017_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_6052 = const()[name = string("op_6052"), val = int32(-1)]; tensor attn_weights_65_cast_fp16 = softmax(axis = var_6052, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = value_states_63_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_6063_perm_0 = const()[name = string("op_6063_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6067 = const()[name = string("op_6067"), val = tensor([1, 1, 4096])]; tensor var_6063_cast_fp16 = transpose(perm = var_6063_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_46")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_6067, x = var_6063_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_6072 = const()[name = string("op_6072"), val = tensor([0, 2, 1])]; string var_6088_pad_type_0 = const()[name = string("op_6088_pad_type_0"), val = string("valid")]; int32 var_6088_groups_0 = const()[name = string("op_6088_groups_0"), val = int32(1)]; tensor var_6088_strides_0 = const()[name = string("op_6088_strides_0"), val = tensor([1])]; tensor var_6088_pad_0 = const()[name = string("op_6088_pad_0"), val = tensor([0, 0])]; tensor var_6088_dilations_0 = const()[name = string("op_6088_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907444416))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6073_cast_fp16 = transpose(perm = var_6072, x = attn_output_105_cast_fp16)[name = string("transpose_45")]; tensor var_6088_cast_fp16 = conv(dilations = var_6088_dilations_0, groups = var_6088_groups_0, pad = var_6088_pad_0, pad_type = var_6088_pad_type_0, strides = var_6088_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6073_cast_fp16)[name = string("op_6088_cast_fp16")]; tensor var_6092 = const()[name = string("op_6092"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_6092, x = var_6088_cast_fp16)[name = string("transpose_44")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_6111_axes_0 = const()[name = string("op_6111_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907454720)))]; fp16 var_6099_to_fp16 = const()[name = string("op_6099_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6111_cast_fp16 = layer_norm(axes = var_6111_axes_0, epsilon = var_6099_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_6111_cast_fp16")]; tensor var_6125 = const()[name = string("op_6125"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_6126 = transpose(perm = var_6125, x = var_6111_cast_fp16)[name = string("transpose_43")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6126)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_6148_axes_0 = const()[name = string("op_6148_axes_0"), val = tensor([2])]; tensor var_6148 = squeeze(axes = var_6148_axes_0, x = e_21)[name = string("op_6148")]; tensor var_6149 = const()[name = string("op_6149"), val = tensor([0, 2, 1])]; tensor var_6150 = transpose(perm = var_6149, x = var_6148)[name = string("transpose_42")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_6150)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_6168_axes_0 = const()[name = string("op_6168_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907459904)))]; fp16 var_6156_to_fp16 = const()[name = string("op_6156_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6168_cast_fp16 = layer_norm(axes = var_6168_axes_0, epsilon = var_6156_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_6168_cast_fp16")]; tensor var_6174 = const()[name = string("op_6174"), val = tensor([0, 2, 1])]; tensor var_6177_axes_0 = const()[name = string("op_6177_axes_0"), val = tensor([2])]; tensor var_6175 = transpose(perm = var_6174, x = var_6168_cast_fp16)[name = string("transpose_41")]; tensor var_6177 = expand_dims(axes = var_6177_axes_0, x = var_6175)[name = string("op_6177")]; string var_6193_pad_type_0 = const()[name = string("op_6193_pad_type_0"), val = string("valid")]; tensor var_6193_strides_0 = const()[name = string("op_6193_strides_0"), val = tensor([1, 1])]; tensor var_6193_pad_0 = const()[name = string("op_6193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6193_dilations_0 = const()[name = string("op_6193_dilations_0"), val = tensor([1, 1])]; int32 var_6193_groups_0 = const()[name = string("op_6193_groups_0"), val = int32(1)]; tensor var_6193 = conv(dilations = var_6193_dilations_0, groups = var_6193_groups_0, pad = var_6193_pad_0, pad_type = var_6193_pad_type_0, strides = var_6193_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_6177)[name = string("op_6193")]; tensor var_6198 = const()[name = string("op_6198"), val = tensor([1, 32, 1, 128])]; tensor var_6199 = reshape(shape = var_6198, x = var_6193)[name = string("op_6199")]; string var_6215_pad_type_0 = const()[name = string("op_6215_pad_type_0"), val = string("valid")]; tensor var_6215_strides_0 = const()[name = string("op_6215_strides_0"), val = tensor([1, 1])]; tensor var_6215_pad_0 = const()[name = string("op_6215_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6215_dilations_0 = const()[name = string("op_6215_dilations_0"), val = tensor([1, 1])]; int32 var_6215_groups_0 = const()[name = string("op_6215_groups_0"), val = int32(1)]; tensor var_6215 = conv(dilations = var_6215_dilations_0, groups = var_6215_groups_0, pad = var_6215_pad_0, pad_type = var_6215_pad_type_0, strides = var_6215_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_6177)[name = string("op_6215")]; tensor var_6220 = const()[name = string("op_6220"), val = tensor([1, 8, 1, 128])]; tensor var_6221 = reshape(shape = var_6220, x = var_6215)[name = string("op_6221")]; string var_6237_pad_type_0 = const()[name = string("op_6237_pad_type_0"), val = string("valid")]; tensor var_6237_strides_0 = const()[name = string("op_6237_strides_0"), val = tensor([1, 1])]; tensor var_6237_pad_0 = const()[name = string("op_6237_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6237_dilations_0 = const()[name = string("op_6237_dilations_0"), val = tensor([1, 1])]; int32 var_6237_groups_0 = const()[name = string("op_6237_groups_0"), val = int32(1)]; tensor var_6237 = conv(dilations = var_6237_dilations_0, groups = var_6237_groups_0, pad = var_6237_pad_0, pad_type = var_6237_pad_type_0, strides = var_6237_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_6177)[name = string("op_6237")]; tensor var_6242 = const()[name = string("op_6242"), val = tensor([1, 8, 1, 128])]; tensor var_6243 = reshape(shape = var_6242, x = var_6237)[name = string("op_6243")]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = var_6199)[name = string("mean_91")]; tensor input_203 = sub(x = var_6199, y = mean_91)[name = string("input_203")]; tensor var_6264_axes_0 = const()[name = string("op_6264_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465088)))]; fp16 var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6264_cast_fp16 = layer_norm(axes = var_6264_axes_0, epsilon = var_6252_to_fp16, gamma = model_model_layers_11_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_6264_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = var_6221)[name = string("mean_93")]; tensor input_205 = sub(x = var_6221, y = mean_93)[name = string("input_205")]; tensor var_6282_axes_0 = const()[name = string("op_6282_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465408)))]; fp16 var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6282_cast_fp16 = layer_norm(axes = var_6282_axes_0, epsilon = var_6270_to_fp16, gamma = model_model_layers_11_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_6282_cast_fp16")]; tensor var_6285 = mul(x = var_6264_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6285")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_6264_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_6264_cast_fp16)[name = string("x2_45")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_6306 = mul(x = x2_45, y = const_203_promoted)[name = string("op_6306")]; int32 var_6308 = const()[name = string("op_6308"), val = int32(-1)]; bool var_6309_interleave_0 = const()[name = string("op_6309_interleave_0"), val = bool(false)]; tensor var_6309 = concat(axis = var_6308, interleave = var_6309_interleave_0, values = (var_6306, x1_45))[name = string("op_6309")]; tensor var_6310 = mul(x = var_6309, y = sin_1_cast_fp16)[name = string("op_6310")]; tensor query_states_45 = add(x = var_6285, y = var_6310)[name = string("query_states_45")]; tensor var_6313 = mul(x = var_6282_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6313")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_6282_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_6282_cast_fp16)[name = string("x2_47")]; fp16 const_206_promoted = const()[name = string("const_206_promoted"), val = fp16(-0x1p+0)]; tensor var_6334 = mul(x = x2_47, y = const_206_promoted)[name = string("op_6334")]; int32 var_6336 = const()[name = string("op_6336"), val = int32(-1)]; bool var_6337_interleave_0 = const()[name = string("op_6337_interleave_0"), val = bool(false)]; tensor var_6337 = concat(axis = var_6336, interleave = var_6337_interleave_0, values = (var_6334, x1_47))[name = string("op_6337")]; tensor var_6338 = mul(x = var_6337, y = sin_1_cast_fp16)[name = string("op_6338")]; tensor key_states_45 = add(x = var_6313, y = var_6338)[name = string("key_states_45")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_90")]; tensor concat_91_values1_0 = const()[name = string("concat_91_values1_0"), val = tensor([0])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_136, concat_91_values1_0, var_1195, concat_91_values3_0))[name = string("concat_91")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_45, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([47])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([48])]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_94")]; tensor concat_95_values1_0 = const()[name = string("concat_95_values1_0"), val = tensor([0])]; tensor concat_95_values3_0 = const()[name = string("concat_95_values3_0"), val = tensor([0])]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (expand_dims_142, concat_95_values1_0, var_1195, concat_95_values3_0))[name = string("concat_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = var_6243, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_6393_begin_0 = const()[name = string("op_6393_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_6393_end_0 = const()[name = string("op_6393_end_0"), val = tensor([12, 8, 1024, 128])]; tensor var_6393_end_mask_0 = const()[name = string("op_6393_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6393_cast_fp16 = slice_by_index(begin = var_6393_begin_0, end = var_6393_end_0, end_mask = var_6393_end_mask_0, x = coreml_update_state_59)[name = string("op_6393_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_6393_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_6400_begin_0 = const()[name = string("op_6400_begin_0"), val = tensor([47, 0, 0, 0])]; tensor var_6400_end_0 = const()[name = string("op_6400_end_0"), val = tensor([48, 8, 1024, 128])]; tensor var_6400_end_mask_0 = const()[name = string("op_6400_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6400_cast_fp16 = slice_by_index(begin = var_6400_begin_0, end = var_6400_end_0, end_mask = var_6400_end_mask_0, x = coreml_update_state_59)[name = string("op_6400_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_6400_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_6437 = const()[name = string("op_6437"), val = tensor([1, 4, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_6437, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_6449 = const()[name = string("op_6449"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_6449, x = x_229_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_6457 = const()[name = string("op_6457"), val = tensor([1, 4, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_6457, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_6469 = const()[name = string("op_6469"), val = tensor([1, -1, 1024, 128])]; tensor value_states_69_cast_fp16 = reshape(shape = var_6469, x = x_235_cast_fp16)[name = string("value_states_69_cast_fp16")]; bool var_6484_transpose_x_1 = const()[name = string("op_6484_transpose_x_1"), val = bool(false)]; bool var_6484_transpose_y_1 = const()[name = string("op_6484_transpose_y_1"), val = bool(true)]; tensor var_6484 = matmul(transpose_x = var_6484_transpose_x_1, transpose_y = var_6484_transpose_y_1, x = query_states_45, y = key_states_47_cast_fp16)[name = string("op_6484")]; fp16 var_6485_to_fp16 = const()[name = string("op_6485_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_6484, y = var_6485_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_6520 = const()[name = string("op_6520"), val = int32(-1)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_6520, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = value_states_69_cast_fp16)[name = string("attn_output_111_cast_fp16")]; tensor var_6531_perm_0 = const()[name = string("op_6531_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6535 = const()[name = string("op_6535"), val = tensor([1, 1, 4096])]; tensor var_6531_cast_fp16 = transpose(perm = var_6531_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_40")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_6535, x = var_6531_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_6540 = const()[name = string("op_6540"), val = tensor([0, 2, 1])]; string var_6556_pad_type_0 = const()[name = string("op_6556_pad_type_0"), val = string("valid")]; int32 var_6556_groups_0 = const()[name = string("op_6556_groups_0"), val = int32(1)]; tensor var_6556_strides_0 = const()[name = string("op_6556_strides_0"), val = tensor([1])]; tensor var_6556_pad_0 = const()[name = string("op_6556_pad_0"), val = tensor([0, 0])]; tensor var_6556_dilations_0 = const()[name = string("op_6556_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912708672))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6541_cast_fp16 = transpose(perm = var_6540, x = attn_output_115_cast_fp16)[name = string("transpose_39")]; tensor var_6556_cast_fp16 = conv(dilations = var_6556_dilations_0, groups = var_6556_groups_0, pad = var_6556_pad_0, pad_type = var_6556_pad_type_0, strides = var_6556_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_6541_cast_fp16)[name = string("op_6556_cast_fp16")]; tensor var_6560 = const()[name = string("op_6560"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_6560, x = var_6556_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_6579_axes_0 = const()[name = string("op_6579_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912718976)))]; fp16 var_6567_to_fp16 = const()[name = string("op_6567_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6579_cast_fp16 = layer_norm(axes = var_6579_axes_0, epsilon = var_6567_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_6579_cast_fp16")]; tensor var_6593 = const()[name = string("op_6593"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_6594 = transpose(perm = var_6593, x = var_6579_cast_fp16)[name = string("transpose_37")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_6594)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_6616_axes_0 = const()[name = string("op_6616_axes_0"), val = tensor([2])]; tensor var_6616 = squeeze(axes = var_6616_axes_0, x = e_23)[name = string("op_6616")]; tensor var_6617 = const()[name = string("op_6617"), val = tensor([0, 2, 1])]; tensor var_6618 = transpose(perm = var_6617, x = var_6616)[name = string("transpose_36")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_6618)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_6636_axes_0 = const()[name = string("op_6636_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912724160)))]; fp16 var_6624_to_fp16 = const()[name = string("op_6624_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6636_cast_fp16 = layer_norm(axes = var_6636_axes_0, epsilon = var_6624_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_6636_cast_fp16")]; tensor var_6642 = const()[name = string("op_6642"), val = tensor([0, 2, 1])]; tensor var_6645_axes_0 = const()[name = string("op_6645_axes_0"), val = tensor([2])]; tensor var_6643 = transpose(perm = var_6642, x = var_6636_cast_fp16)[name = string("transpose_35")]; tensor var_6645 = expand_dims(axes = var_6645_axes_0, x = var_6643)[name = string("op_6645")]; string var_6661_pad_type_0 = const()[name = string("op_6661_pad_type_0"), val = string("valid")]; tensor var_6661_strides_0 = const()[name = string("op_6661_strides_0"), val = tensor([1, 1])]; tensor var_6661_pad_0 = const()[name = string("op_6661_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6661_dilations_0 = const()[name = string("op_6661_dilations_0"), val = tensor([1, 1])]; int32 var_6661_groups_0 = const()[name = string("op_6661_groups_0"), val = int32(1)]; tensor var_6661 = conv(dilations = var_6661_dilations_0, groups = var_6661_groups_0, pad = var_6661_pad_0, pad_type = var_6661_pad_type_0, strides = var_6661_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_6645)[name = string("op_6661")]; tensor var_6666 = const()[name = string("op_6666"), val = tensor([1, 32, 1, 128])]; tensor var_6667 = reshape(shape = var_6666, x = var_6661)[name = string("op_6667")]; string var_6683_pad_type_0 = const()[name = string("op_6683_pad_type_0"), val = string("valid")]; tensor var_6683_strides_0 = const()[name = string("op_6683_strides_0"), val = tensor([1, 1])]; tensor var_6683_pad_0 = const()[name = string("op_6683_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6683_dilations_0 = const()[name = string("op_6683_dilations_0"), val = tensor([1, 1])]; int32 var_6683_groups_0 = const()[name = string("op_6683_groups_0"), val = int32(1)]; tensor var_6683 = conv(dilations = var_6683_dilations_0, groups = var_6683_groups_0, pad = var_6683_pad_0, pad_type = var_6683_pad_type_0, strides = var_6683_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_6645)[name = string("op_6683")]; tensor var_6688 = const()[name = string("op_6688"), val = tensor([1, 8, 1, 128])]; tensor var_6689 = reshape(shape = var_6688, x = var_6683)[name = string("op_6689")]; string var_6705_pad_type_0 = const()[name = string("op_6705_pad_type_0"), val = string("valid")]; tensor var_6705_strides_0 = const()[name = string("op_6705_strides_0"), val = tensor([1, 1])]; tensor var_6705_pad_0 = const()[name = string("op_6705_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6705_dilations_0 = const()[name = string("op_6705_dilations_0"), val = tensor([1, 1])]; int32 var_6705_groups_0 = const()[name = string("op_6705_groups_0"), val = int32(1)]; tensor var_6705 = conv(dilations = var_6705_dilations_0, groups = var_6705_groups_0, pad = var_6705_pad_0, pad_type = var_6705_pad_type_0, strides = var_6705_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_6645)[name = string("op_6705")]; tensor var_6710 = const()[name = string("op_6710"), val = tensor([1, 8, 1, 128])]; tensor var_6711 = reshape(shape = var_6710, x = var_6705)[name = string("op_6711")]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = var_6667)[name = string("mean_99")]; tensor input_221 = sub(x = var_6667, y = mean_99)[name = string("input_221")]; tensor var_6732_axes_0 = const()[name = string("op_6732_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729344)))]; fp16 var_6720_to_fp16 = const()[name = string("op_6720_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6732_cast_fp16 = layer_norm(axes = var_6732_axes_0, epsilon = var_6720_to_fp16, gamma = model_model_layers_12_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_6732_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = var_6689)[name = string("mean_101")]; tensor input_223 = sub(x = var_6689, y = mean_101)[name = string("input_223")]; tensor var_6750_axes_0 = const()[name = string("op_6750_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729664)))]; fp16 var_6738_to_fp16 = const()[name = string("op_6738_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6750_cast_fp16 = layer_norm(axes = var_6750_axes_0, epsilon = var_6738_to_fp16, gamma = model_model_layers_12_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_6750_cast_fp16")]; tensor var_6753 = mul(x = var_6732_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6753")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_6732_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_6732_cast_fp16)[name = string("x2_49")]; fp16 const_221_promoted = const()[name = string("const_221_promoted"), val = fp16(-0x1p+0)]; tensor var_6774 = mul(x = x2_49, y = const_221_promoted)[name = string("op_6774")]; int32 var_6776 = const()[name = string("op_6776"), val = int32(-1)]; bool var_6777_interleave_0 = const()[name = string("op_6777_interleave_0"), val = bool(false)]; tensor var_6777 = concat(axis = var_6776, interleave = var_6777_interleave_0, values = (var_6774, x1_49))[name = string("op_6777")]; tensor var_6778 = mul(x = var_6777, y = sin_1_cast_fp16)[name = string("op_6778")]; tensor query_states_49 = add(x = var_6753, y = var_6778)[name = string("query_states_49")]; tensor var_6781 = mul(x = var_6750_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6781")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_6750_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_6750_cast_fp16)[name = string("x2_51")]; fp16 const_224_promoted = const()[name = string("const_224_promoted"), val = fp16(-0x1p+0)]; tensor var_6802 = mul(x = x2_51, y = const_224_promoted)[name = string("op_6802")]; int32 var_6804 = const()[name = string("op_6804"), val = int32(-1)]; bool var_6805_interleave_0 = const()[name = string("op_6805_interleave_0"), val = bool(false)]; tensor var_6805 = concat(axis = var_6804, interleave = var_6805_interleave_0, values = (var_6802, x1_51))[name = string("op_6805")]; tensor var_6806 = mul(x = var_6805, y = sin_1_cast_fp16)[name = string("op_6806")]; tensor key_states_49 = add(x = var_6781, y = var_6806)[name = string("key_states_49")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1195, concat_99_values3_0))[name = string("concat_99")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_49, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([48])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([49])]; int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1195, concat_103_values3_0))[name = string("concat_103")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = var_6711, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_6861_begin_0 = const()[name = string("op_6861_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_6861_end_0 = const()[name = string("op_6861_end_0"), val = tensor([13, 8, 1024, 128])]; tensor var_6861_end_mask_0 = const()[name = string("op_6861_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6861_cast_fp16 = slice_by_index(begin = var_6861_begin_0, end = var_6861_end_0, end_mask = var_6861_end_mask_0, x = coreml_update_state_61)[name = string("op_6861_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_6861_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_6868_begin_0 = const()[name = string("op_6868_begin_0"), val = tensor([48, 0, 0, 0])]; tensor var_6868_end_0 = const()[name = string("op_6868_end_0"), val = tensor([49, 8, 1024, 128])]; tensor var_6868_end_mask_0 = const()[name = string("op_6868_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6868_cast_fp16 = slice_by_index(begin = var_6868_begin_0, end = var_6868_end_0, end_mask = var_6868_end_mask_0, x = coreml_update_state_61)[name = string("op_6868_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_6868_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_6905 = const()[name = string("op_6905"), val = tensor([1, 4, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_6905, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_6917 = const()[name = string("op_6917"), val = tensor([1, -1, 1024, 128])]; tensor key_states_51_cast_fp16 = reshape(shape = var_6917, x = x_249_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_6925 = const()[name = string("op_6925"), val = tensor([1, 4, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_6925, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; tensor var_6937 = const()[name = string("op_6937"), val = tensor([1, -1, 1024, 128])]; tensor value_states_75_cast_fp16 = reshape(shape = var_6937, x = x_255_cast_fp16)[name = string("value_states_75_cast_fp16")]; bool var_6952_transpose_x_1 = const()[name = string("op_6952_transpose_x_1"), val = bool(false)]; bool var_6952_transpose_y_1 = const()[name = string("op_6952_transpose_y_1"), val = bool(true)]; tensor var_6952 = matmul(transpose_x = var_6952_transpose_x_1, transpose_y = var_6952_transpose_y_1, x = query_states_49, y = key_states_51_cast_fp16)[name = string("op_6952")]; fp16 var_6953_to_fp16 = const()[name = string("op_6953_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_6952, y = var_6953_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_6988 = const()[name = string("op_6988"), val = int32(-1)]; tensor attn_weights_77_cast_fp16 = softmax(axis = var_6988, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = value_states_75_cast_fp16)[name = string("attn_output_121_cast_fp16")]; tensor var_6999_perm_0 = const()[name = string("op_6999_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7003 = const()[name = string("op_7003"), val = tensor([1, 1, 4096])]; tensor var_6999_cast_fp16 = transpose(perm = var_6999_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_34")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_7003, x = var_6999_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_7008 = const()[name = string("op_7008"), val = tensor([0, 2, 1])]; string var_7024_pad_type_0 = const()[name = string("op_7024_pad_type_0"), val = string("valid")]; int32 var_7024_groups_0 = const()[name = string("op_7024_groups_0"), val = int32(1)]; tensor var_7024_strides_0 = const()[name = string("op_7024_strides_0"), val = tensor([1])]; tensor var_7024_pad_0 = const()[name = string("op_7024_pad_0"), val = tensor([0, 0])]; tensor var_7024_dilations_0 = const()[name = string("op_7024_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917972928))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7009_cast_fp16 = transpose(perm = var_7008, x = attn_output_125_cast_fp16)[name = string("transpose_33")]; tensor var_7024_cast_fp16 = conv(dilations = var_7024_dilations_0, groups = var_7024_groups_0, pad = var_7024_pad_0, pad_type = var_7024_pad_type_0, strides = var_7024_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7009_cast_fp16)[name = string("op_7024_cast_fp16")]; tensor var_7028 = const()[name = string("op_7028"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_7028, x = var_7024_cast_fp16)[name = string("transpose_32")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_7047_axes_0 = const()[name = string("op_7047_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917983232)))]; fp16 var_7035_to_fp16 = const()[name = string("op_7035_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7047_cast_fp16 = layer_norm(axes = var_7047_axes_0, epsilon = var_7035_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_7047_cast_fp16")]; tensor var_7061 = const()[name = string("op_7061"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_7062 = transpose(perm = var_7061, x = var_7047_cast_fp16)[name = string("transpose_31")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7062)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_7084_axes_0 = const()[name = string("op_7084_axes_0"), val = tensor([2])]; tensor var_7084 = squeeze(axes = var_7084_axes_0, x = e_25)[name = string("op_7084")]; tensor var_7085 = const()[name = string("op_7085"), val = tensor([0, 2, 1])]; tensor var_7086 = transpose(perm = var_7085, x = var_7084)[name = string("transpose_30")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_7086)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_7104_axes_0 = const()[name = string("op_7104_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917988416)))]; fp16 var_7092_to_fp16 = const()[name = string("op_7092_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7104_cast_fp16 = layer_norm(axes = var_7104_axes_0, epsilon = var_7092_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_7104_cast_fp16")]; tensor var_7110 = const()[name = string("op_7110"), val = tensor([0, 2, 1])]; tensor var_7113_axes_0 = const()[name = string("op_7113_axes_0"), val = tensor([2])]; tensor var_7111 = transpose(perm = var_7110, x = var_7104_cast_fp16)[name = string("transpose_29")]; tensor var_7113 = expand_dims(axes = var_7113_axes_0, x = var_7111)[name = string("op_7113")]; string var_7129_pad_type_0 = const()[name = string("op_7129_pad_type_0"), val = string("valid")]; tensor var_7129_strides_0 = const()[name = string("op_7129_strides_0"), val = tensor([1, 1])]; tensor var_7129_pad_0 = const()[name = string("op_7129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7129_dilations_0 = const()[name = string("op_7129_dilations_0"), val = tensor([1, 1])]; int32 var_7129_groups_0 = const()[name = string("op_7129_groups_0"), val = int32(1)]; tensor var_7129 = conv(dilations = var_7129_dilations_0, groups = var_7129_groups_0, pad = var_7129_pad_0, pad_type = var_7129_pad_type_0, strides = var_7129_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_7113)[name = string("op_7129")]; tensor var_7134 = const()[name = string("op_7134"), val = tensor([1, 32, 1, 128])]; tensor var_7135 = reshape(shape = var_7134, x = var_7129)[name = string("op_7135")]; string var_7151_pad_type_0 = const()[name = string("op_7151_pad_type_0"), val = string("valid")]; tensor var_7151_strides_0 = const()[name = string("op_7151_strides_0"), val = tensor([1, 1])]; tensor var_7151_pad_0 = const()[name = string("op_7151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7151_dilations_0 = const()[name = string("op_7151_dilations_0"), val = tensor([1, 1])]; int32 var_7151_groups_0 = const()[name = string("op_7151_groups_0"), val = int32(1)]; tensor var_7151 = conv(dilations = var_7151_dilations_0, groups = var_7151_groups_0, pad = var_7151_pad_0, pad_type = var_7151_pad_type_0, strides = var_7151_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_7113)[name = string("op_7151")]; tensor var_7156 = const()[name = string("op_7156"), val = tensor([1, 8, 1, 128])]; tensor var_7157 = reshape(shape = var_7156, x = var_7151)[name = string("op_7157")]; string var_7173_pad_type_0 = const()[name = string("op_7173_pad_type_0"), val = string("valid")]; tensor var_7173_strides_0 = const()[name = string("op_7173_strides_0"), val = tensor([1, 1])]; tensor var_7173_pad_0 = const()[name = string("op_7173_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7173_dilations_0 = const()[name = string("op_7173_dilations_0"), val = tensor([1, 1])]; int32 var_7173_groups_0 = const()[name = string("op_7173_groups_0"), val = int32(1)]; tensor var_7173 = conv(dilations = var_7173_dilations_0, groups = var_7173_groups_0, pad = var_7173_pad_0, pad_type = var_7173_pad_type_0, strides = var_7173_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_7113)[name = string("op_7173")]; tensor var_7178 = const()[name = string("op_7178"), val = tensor([1, 8, 1, 128])]; tensor var_7179 = reshape(shape = var_7178, x = var_7173)[name = string("op_7179")]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = var_7135)[name = string("mean_107")]; tensor input_239 = sub(x = var_7135, y = mean_107)[name = string("input_239")]; tensor var_7200_axes_0 = const()[name = string("op_7200_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993600)))]; fp16 var_7188_to_fp16 = const()[name = string("op_7188_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7200_cast_fp16 = layer_norm(axes = var_7200_axes_0, epsilon = var_7188_to_fp16, gamma = model_model_layers_13_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_7200_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = var_7157)[name = string("mean_109")]; tensor input_241 = sub(x = var_7157, y = mean_109)[name = string("input_241")]; tensor var_7218_axes_0 = const()[name = string("op_7218_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993920)))]; fp16 var_7206_to_fp16 = const()[name = string("op_7206_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7218_cast_fp16 = layer_norm(axes = var_7218_axes_0, epsilon = var_7206_to_fp16, gamma = model_model_layers_13_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_7218_cast_fp16")]; tensor var_7221 = mul(x = var_7200_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7221")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_7200_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_7200_cast_fp16)[name = string("x2_53")]; fp16 const_239_promoted = const()[name = string("const_239_promoted"), val = fp16(-0x1p+0)]; tensor var_7242 = mul(x = x2_53, y = const_239_promoted)[name = string("op_7242")]; int32 var_7244 = const()[name = string("op_7244"), val = int32(-1)]; bool var_7245_interleave_0 = const()[name = string("op_7245_interleave_0"), val = bool(false)]; tensor var_7245 = concat(axis = var_7244, interleave = var_7245_interleave_0, values = (var_7242, x1_53))[name = string("op_7245")]; tensor var_7246 = mul(x = var_7245, y = sin_1_cast_fp16)[name = string("op_7246")]; tensor query_states_53 = add(x = var_7221, y = var_7246)[name = string("query_states_53")]; tensor var_7249 = mul(x = var_7218_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7249")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_7218_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_7218_cast_fp16)[name = string("x2_55")]; fp16 const_242_promoted = const()[name = string("const_242_promoted"), val = fp16(-0x1p+0)]; tensor var_7270 = mul(x = x2_55, y = const_242_promoted)[name = string("op_7270")]; int32 var_7272 = const()[name = string("op_7272"), val = int32(-1)]; bool var_7273_interleave_0 = const()[name = string("op_7273_interleave_0"), val = bool(false)]; tensor var_7273 = concat(axis = var_7272, interleave = var_7273_interleave_0, values = (var_7270, x1_55))[name = string("op_7273")]; tensor var_7274 = mul(x = var_7273, y = sin_1_cast_fp16)[name = string("op_7274")]; tensor key_states_53 = add(x = var_7249, y = var_7274)[name = string("key_states_53")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1195, concat_107_values3_0))[name = string("concat_107")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_53, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([49])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([50])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1195, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = var_7179, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_7329_begin_0 = const()[name = string("op_7329_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_7329_end_0 = const()[name = string("op_7329_end_0"), val = tensor([14, 8, 1024, 128])]; tensor var_7329_end_mask_0 = const()[name = string("op_7329_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7329_cast_fp16 = slice_by_index(begin = var_7329_begin_0, end = var_7329_end_0, end_mask = var_7329_end_mask_0, x = coreml_update_state_63)[name = string("op_7329_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_7329_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_7336_begin_0 = const()[name = string("op_7336_begin_0"), val = tensor([49, 0, 0, 0])]; tensor var_7336_end_0 = const()[name = string("op_7336_end_0"), val = tensor([50, 8, 1024, 128])]; tensor var_7336_end_mask_0 = const()[name = string("op_7336_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7336_cast_fp16 = slice_by_index(begin = var_7336_begin_0, end = var_7336_end_0, end_mask = var_7336_end_mask_0, x = coreml_update_state_63)[name = string("op_7336_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_7336_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_7373 = const()[name = string("op_7373"), val = tensor([1, 4, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_7373, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_7385 = const()[name = string("op_7385"), val = tensor([1, -1, 1024, 128])]; tensor key_states_55_cast_fp16 = reshape(shape = var_7385, x = x_269_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_7393 = const()[name = string("op_7393"), val = tensor([1, 4, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_7393, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; tensor var_7405 = const()[name = string("op_7405"), val = tensor([1, -1, 1024, 128])]; tensor value_states_81_cast_fp16 = reshape(shape = var_7405, x = x_275_cast_fp16)[name = string("value_states_81_cast_fp16")]; bool var_7420_transpose_x_1 = const()[name = string("op_7420_transpose_x_1"), val = bool(false)]; bool var_7420_transpose_y_1 = const()[name = string("op_7420_transpose_y_1"), val = bool(true)]; tensor var_7420 = matmul(transpose_x = var_7420_transpose_x_1, transpose_y = var_7420_transpose_y_1, x = query_states_53, y = key_states_55_cast_fp16)[name = string("op_7420")]; fp16 var_7421_to_fp16 = const()[name = string("op_7421_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_7420, y = var_7421_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_7456 = const()[name = string("op_7456"), val = int32(-1)]; tensor attn_weights_83_cast_fp16 = softmax(axis = var_7456, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = value_states_81_cast_fp16)[name = string("attn_output_131_cast_fp16")]; tensor var_7467_perm_0 = const()[name = string("op_7467_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7471 = const()[name = string("op_7471"), val = tensor([1, 1, 4096])]; tensor var_7467_cast_fp16 = transpose(perm = var_7467_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_28")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_7471, x = var_7467_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_7476 = const()[name = string("op_7476"), val = tensor([0, 2, 1])]; string var_7492_pad_type_0 = const()[name = string("op_7492_pad_type_0"), val = string("valid")]; int32 var_7492_groups_0 = const()[name = string("op_7492_groups_0"), val = int32(1)]; tensor var_7492_strides_0 = const()[name = string("op_7492_strides_0"), val = tensor([1])]; tensor var_7492_pad_0 = const()[name = string("op_7492_pad_0"), val = tensor([0, 0])]; tensor var_7492_dilations_0 = const()[name = string("op_7492_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917994240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923237184))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7477_cast_fp16 = transpose(perm = var_7476, x = attn_output_135_cast_fp16)[name = string("transpose_27")]; tensor var_7492_cast_fp16 = conv(dilations = var_7492_dilations_0, groups = var_7492_groups_0, pad = var_7492_pad_0, pad_type = var_7492_pad_type_0, strides = var_7492_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_7477_cast_fp16)[name = string("op_7492_cast_fp16")]; tensor var_7496 = const()[name = string("op_7496"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_7496, x = var_7492_cast_fp16)[name = string("transpose_26")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_7515_axes_0 = const()[name = string("op_7515_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923247488)))]; fp16 var_7503_to_fp16 = const()[name = string("op_7503_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7515_cast_fp16 = layer_norm(axes = var_7515_axes_0, epsilon = var_7503_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_7515_cast_fp16")]; tensor var_7529 = const()[name = string("op_7529"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_7530 = transpose(perm = var_7529, x = var_7515_cast_fp16)[name = string("transpose_25")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_7530)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_7552_axes_0 = const()[name = string("op_7552_axes_0"), val = tensor([2])]; tensor var_7552 = squeeze(axes = var_7552_axes_0, x = e_27)[name = string("op_7552")]; tensor var_7553 = const()[name = string("op_7553"), val = tensor([0, 2, 1])]; tensor var_7554 = transpose(perm = var_7553, x = var_7552)[name = string("transpose_24")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_7554)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_7572_axes_0 = const()[name = string("op_7572_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923252672)))]; fp16 var_7560_to_fp16 = const()[name = string("op_7560_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7572_cast_fp16 = layer_norm(axes = var_7572_axes_0, epsilon = var_7560_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_7572_cast_fp16")]; tensor var_7578 = const()[name = string("op_7578"), val = tensor([0, 2, 1])]; tensor var_7581_axes_0 = const()[name = string("op_7581_axes_0"), val = tensor([2])]; tensor var_7579 = transpose(perm = var_7578, x = var_7572_cast_fp16)[name = string("transpose_23")]; tensor var_7581 = expand_dims(axes = var_7581_axes_0, x = var_7579)[name = string("op_7581")]; string var_7597_pad_type_0 = const()[name = string("op_7597_pad_type_0"), val = string("valid")]; tensor var_7597_strides_0 = const()[name = string("op_7597_strides_0"), val = tensor([1, 1])]; tensor var_7597_pad_0 = const()[name = string("op_7597_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7597_dilations_0 = const()[name = string("op_7597_dilations_0"), val = tensor([1, 1])]; int32 var_7597_groups_0 = const()[name = string("op_7597_groups_0"), val = int32(1)]; tensor var_7597 = conv(dilations = var_7597_dilations_0, groups = var_7597_groups_0, pad = var_7597_pad_0, pad_type = var_7597_pad_type_0, strides = var_7597_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_7581)[name = string("op_7597")]; tensor var_7602 = const()[name = string("op_7602"), val = tensor([1, 32, 1, 128])]; tensor var_7603 = reshape(shape = var_7602, x = var_7597)[name = string("op_7603")]; string var_7619_pad_type_0 = const()[name = string("op_7619_pad_type_0"), val = string("valid")]; tensor var_7619_strides_0 = const()[name = string("op_7619_strides_0"), val = tensor([1, 1])]; tensor var_7619_pad_0 = const()[name = string("op_7619_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7619_dilations_0 = const()[name = string("op_7619_dilations_0"), val = tensor([1, 1])]; int32 var_7619_groups_0 = const()[name = string("op_7619_groups_0"), val = int32(1)]; tensor var_7619 = conv(dilations = var_7619_dilations_0, groups = var_7619_groups_0, pad = var_7619_pad_0, pad_type = var_7619_pad_type_0, strides = var_7619_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_7581)[name = string("op_7619")]; tensor var_7624 = const()[name = string("op_7624"), val = tensor([1, 8, 1, 128])]; tensor var_7625 = reshape(shape = var_7624, x = var_7619)[name = string("op_7625")]; string var_7641_pad_type_0 = const()[name = string("op_7641_pad_type_0"), val = string("valid")]; tensor var_7641_strides_0 = const()[name = string("op_7641_strides_0"), val = tensor([1, 1])]; tensor var_7641_pad_0 = const()[name = string("op_7641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7641_dilations_0 = const()[name = string("op_7641_dilations_0"), val = tensor([1, 1])]; int32 var_7641_groups_0 = const()[name = string("op_7641_groups_0"), val = int32(1)]; tensor var_7641 = conv(dilations = var_7641_dilations_0, groups = var_7641_groups_0, pad = var_7641_pad_0, pad_type = var_7641_pad_type_0, strides = var_7641_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_7581)[name = string("op_7641")]; tensor var_7646 = const()[name = string("op_7646"), val = tensor([1, 8, 1, 128])]; tensor var_7647 = reshape(shape = var_7646, x = var_7641)[name = string("op_7647")]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = var_7603)[name = string("mean_115")]; tensor input_257 = sub(x = var_7603, y = mean_115)[name = string("input_257")]; tensor var_7668_axes_0 = const()[name = string("op_7668_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923257856)))]; fp16 var_7656_to_fp16 = const()[name = string("op_7656_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7668_cast_fp16 = layer_norm(axes = var_7668_axes_0, epsilon = var_7656_to_fp16, gamma = model_model_layers_14_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_7668_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = var_7625)[name = string("mean_117")]; tensor input_259 = sub(x = var_7625, y = mean_117)[name = string("input_259")]; tensor var_7686_axes_0 = const()[name = string("op_7686_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258176)))]; fp16 var_7674_to_fp16 = const()[name = string("op_7674_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7686_cast_fp16 = layer_norm(axes = var_7686_axes_0, epsilon = var_7674_to_fp16, gamma = model_model_layers_14_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_7686_cast_fp16")]; tensor var_7689 = mul(x = var_7668_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7689")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_7668_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_7668_cast_fp16)[name = string("x2_57")]; fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; tensor var_7710 = mul(x = x2_57, y = const_257_promoted)[name = string("op_7710")]; int32 var_7712 = const()[name = string("op_7712"), val = int32(-1)]; bool var_7713_interleave_0 = const()[name = string("op_7713_interleave_0"), val = bool(false)]; tensor var_7713 = concat(axis = var_7712, interleave = var_7713_interleave_0, values = (var_7710, x1_57))[name = string("op_7713")]; tensor var_7714 = mul(x = var_7713, y = sin_1_cast_fp16)[name = string("op_7714")]; tensor query_states_57 = add(x = var_7689, y = var_7714)[name = string("query_states_57")]; tensor var_7717 = mul(x = var_7686_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7717")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_7686_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_7686_cast_fp16)[name = string("x2_59")]; fp16 const_260_promoted = const()[name = string("const_260_promoted"), val = fp16(-0x1p+0)]; tensor var_7738 = mul(x = x2_59, y = const_260_promoted)[name = string("op_7738")]; int32 var_7740 = const()[name = string("op_7740"), val = int32(-1)]; bool var_7741_interleave_0 = const()[name = string("op_7741_interleave_0"), val = bool(false)]; tensor var_7741 = concat(axis = var_7740, interleave = var_7741_interleave_0, values = (var_7738, x1_59))[name = string("op_7741")]; tensor var_7742 = mul(x = var_7741, y = sin_1_cast_fp16)[name = string("op_7742")]; tensor key_states_57 = add(x = var_7717, y = var_7742)[name = string("key_states_57")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1195, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_57, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([50])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([51])]; int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1195, concat_119_values3_0))[name = string("concat_119")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = var_7647, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_7797_begin_0 = const()[name = string("op_7797_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_7797_end_0 = const()[name = string("op_7797_end_0"), val = tensor([15, 8, 1024, 128])]; tensor var_7797_end_mask_0 = const()[name = string("op_7797_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7797_cast_fp16 = slice_by_index(begin = var_7797_begin_0, end = var_7797_end_0, end_mask = var_7797_end_mask_0, x = coreml_update_state_65)[name = string("op_7797_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_7797_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_7804_begin_0 = const()[name = string("op_7804_begin_0"), val = tensor([50, 0, 0, 0])]; tensor var_7804_end_0 = const()[name = string("op_7804_end_0"), val = tensor([51, 8, 1024, 128])]; tensor var_7804_end_mask_0 = const()[name = string("op_7804_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7804_cast_fp16 = slice_by_index(begin = var_7804_begin_0, end = var_7804_end_0, end_mask = var_7804_end_mask_0, x = coreml_update_state_65)[name = string("op_7804_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_7804_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_7841 = const()[name = string("op_7841"), val = tensor([1, 4, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_7841, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_7853 = const()[name = string("op_7853"), val = tensor([1, -1, 1024, 128])]; tensor key_states_59_cast_fp16 = reshape(shape = var_7853, x = x_289_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_7861 = const()[name = string("op_7861"), val = tensor([1, 4, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_7861, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; tensor var_7873 = const()[name = string("op_7873"), val = tensor([1, -1, 1024, 128])]; tensor value_states_87_cast_fp16 = reshape(shape = var_7873, x = x_295_cast_fp16)[name = string("value_states_87_cast_fp16")]; bool var_7888_transpose_x_1 = const()[name = string("op_7888_transpose_x_1"), val = bool(false)]; bool var_7888_transpose_y_1 = const()[name = string("op_7888_transpose_y_1"), val = bool(true)]; tensor var_7888 = matmul(transpose_x = var_7888_transpose_x_1, transpose_y = var_7888_transpose_y_1, x = query_states_57, y = key_states_59_cast_fp16)[name = string("op_7888")]; fp16 var_7889_to_fp16 = const()[name = string("op_7889_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_7888, y = var_7889_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_7924 = const()[name = string("op_7924"), val = int32(-1)]; tensor attn_weights_89_cast_fp16 = softmax(axis = var_7924, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = value_states_87_cast_fp16)[name = string("attn_output_141_cast_fp16")]; tensor var_7935_perm_0 = const()[name = string("op_7935_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7939 = const()[name = string("op_7939"), val = tensor([1, 1, 4096])]; tensor var_7935_cast_fp16 = transpose(perm = var_7935_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_22")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_7939, x = var_7935_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_7944 = const()[name = string("op_7944"), val = tensor([0, 2, 1])]; string var_7960_pad_type_0 = const()[name = string("op_7960_pad_type_0"), val = string("valid")]; int32 var_7960_groups_0 = const()[name = string("op_7960_groups_0"), val = int32(1)]; tensor var_7960_strides_0 = const()[name = string("op_7960_strides_0"), val = tensor([1])]; tensor var_7960_pad_0 = const()[name = string("op_7960_pad_0"), val = tensor([0, 0])]; tensor var_7960_dilations_0 = const()[name = string("op_7960_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928501440))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7945_cast_fp16 = transpose(perm = var_7944, x = attn_output_145_cast_fp16)[name = string("transpose_21")]; tensor var_7960_cast_fp16 = conv(dilations = var_7960_dilations_0, groups = var_7960_groups_0, pad = var_7960_pad_0, pad_type = var_7960_pad_type_0, strides = var_7960_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_7945_cast_fp16)[name = string("op_7960_cast_fp16")]; tensor var_7964 = const()[name = string("op_7964"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_7964, x = var_7960_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_7983_axes_0 = const()[name = string("op_7983_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928511744)))]; fp16 var_7971_to_fp16 = const()[name = string("op_7971_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7983_cast_fp16 = layer_norm(axes = var_7983_axes_0, epsilon = var_7971_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_7983_cast_fp16")]; tensor var_7997 = const()[name = string("op_7997"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_7998 = transpose(perm = var_7997, x = var_7983_cast_fp16)[name = string("transpose_19")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_7998)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_8020_axes_0 = const()[name = string("op_8020_axes_0"), val = tensor([2])]; tensor var_8020 = squeeze(axes = var_8020_axes_0, x = e_29)[name = string("op_8020")]; tensor var_8021 = const()[name = string("op_8021"), val = tensor([0, 2, 1])]; tensor var_8022 = transpose(perm = var_8021, x = var_8020)[name = string("transpose_18")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_8022)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_8040_axes_0 = const()[name = string("op_8040_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928516928)))]; fp16 var_8028_to_fp16 = const()[name = string("op_8028_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8040_cast_fp16 = layer_norm(axes = var_8040_axes_0, epsilon = var_8028_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_8040_cast_fp16")]; tensor var_8046 = const()[name = string("op_8046"), val = tensor([0, 2, 1])]; tensor var_8049_axes_0 = const()[name = string("op_8049_axes_0"), val = tensor([2])]; tensor var_8047 = transpose(perm = var_8046, x = var_8040_cast_fp16)[name = string("transpose_17")]; tensor var_8049 = expand_dims(axes = var_8049_axes_0, x = var_8047)[name = string("op_8049")]; string var_8065_pad_type_0 = const()[name = string("op_8065_pad_type_0"), val = string("valid")]; tensor var_8065_strides_0 = const()[name = string("op_8065_strides_0"), val = tensor([1, 1])]; tensor var_8065_pad_0 = const()[name = string("op_8065_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8065_dilations_0 = const()[name = string("op_8065_dilations_0"), val = tensor([1, 1])]; int32 var_8065_groups_0 = const()[name = string("op_8065_groups_0"), val = int32(1)]; tensor var_8065 = conv(dilations = var_8065_dilations_0, groups = var_8065_groups_0, pad = var_8065_pad_0, pad_type = var_8065_pad_type_0, strides = var_8065_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_8049)[name = string("op_8065")]; tensor var_8070 = const()[name = string("op_8070"), val = tensor([1, 32, 1, 128])]; tensor var_8071 = reshape(shape = var_8070, x = var_8065)[name = string("op_8071")]; string var_8087_pad_type_0 = const()[name = string("op_8087_pad_type_0"), val = string("valid")]; tensor var_8087_strides_0 = const()[name = string("op_8087_strides_0"), val = tensor([1, 1])]; tensor var_8087_pad_0 = const()[name = string("op_8087_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8087_dilations_0 = const()[name = string("op_8087_dilations_0"), val = tensor([1, 1])]; int32 var_8087_groups_0 = const()[name = string("op_8087_groups_0"), val = int32(1)]; tensor var_8087 = conv(dilations = var_8087_dilations_0, groups = var_8087_groups_0, pad = var_8087_pad_0, pad_type = var_8087_pad_type_0, strides = var_8087_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_8049)[name = string("op_8087")]; tensor var_8092 = const()[name = string("op_8092"), val = tensor([1, 8, 1, 128])]; tensor var_8093 = reshape(shape = var_8092, x = var_8087)[name = string("op_8093")]; string var_8109_pad_type_0 = const()[name = string("op_8109_pad_type_0"), val = string("valid")]; tensor var_8109_strides_0 = const()[name = string("op_8109_strides_0"), val = tensor([1, 1])]; tensor var_8109_pad_0 = const()[name = string("op_8109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8109_dilations_0 = const()[name = string("op_8109_dilations_0"), val = tensor([1, 1])]; int32 var_8109_groups_0 = const()[name = string("op_8109_groups_0"), val = int32(1)]; tensor var_8109 = conv(dilations = var_8109_dilations_0, groups = var_8109_groups_0, pad = var_8109_pad_0, pad_type = var_8109_pad_type_0, strides = var_8109_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_8049)[name = string("op_8109")]; tensor var_8114 = const()[name = string("op_8114"), val = tensor([1, 8, 1, 128])]; tensor var_8115 = reshape(shape = var_8114, x = var_8109)[name = string("op_8115")]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = var_8071)[name = string("mean_123")]; tensor input_275 = sub(x = var_8071, y = mean_123)[name = string("input_275")]; tensor var_8136_axes_0 = const()[name = string("op_8136_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522112)))]; fp16 var_8124_to_fp16 = const()[name = string("op_8124_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8136_cast_fp16 = layer_norm(axes = var_8136_axes_0, epsilon = var_8124_to_fp16, gamma = model_model_layers_15_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_8136_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = var_8093)[name = string("mean_125")]; tensor input_277 = sub(x = var_8093, y = mean_125)[name = string("input_277")]; tensor var_8154_axes_0 = const()[name = string("op_8154_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522432)))]; fp16 var_8142_to_fp16 = const()[name = string("op_8142_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8154_cast_fp16 = layer_norm(axes = var_8154_axes_0, epsilon = var_8142_to_fp16, gamma = model_model_layers_15_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_8154_cast_fp16")]; tensor var_8157 = mul(x = var_8136_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8157")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_8136_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_8136_cast_fp16)[name = string("x2_61")]; fp16 const_275_promoted = const()[name = string("const_275_promoted"), val = fp16(-0x1p+0)]; tensor var_8178 = mul(x = x2_61, y = const_275_promoted)[name = string("op_8178")]; int32 var_8180 = const()[name = string("op_8180"), val = int32(-1)]; bool var_8181_interleave_0 = const()[name = string("op_8181_interleave_0"), val = bool(false)]; tensor var_8181 = concat(axis = var_8180, interleave = var_8181_interleave_0, values = (var_8178, x1_61))[name = string("op_8181")]; tensor var_8182 = mul(x = var_8181, y = sin_1_cast_fp16)[name = string("op_8182")]; tensor query_states_61 = add(x = var_8157, y = var_8182)[name = string("query_states_61")]; tensor var_8185 = mul(x = var_8154_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8185")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_8154_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_8154_cast_fp16)[name = string("x2_63")]; fp16 const_278_promoted = const()[name = string("const_278_promoted"), val = fp16(-0x1p+0)]; tensor var_8206 = mul(x = x2_63, y = const_278_promoted)[name = string("op_8206")]; int32 var_8208 = const()[name = string("op_8208"), val = int32(-1)]; bool var_8209_interleave_0 = const()[name = string("op_8209_interleave_0"), val = bool(false)]; tensor var_8209 = concat(axis = var_8208, interleave = var_8209_interleave_0, values = (var_8206, x1_63))[name = string("op_8209")]; tensor var_8210 = mul(x = var_8209, y = sin_1_cast_fp16)[name = string("op_8210")]; tensor key_states_61 = add(x = var_8185, y = var_8210)[name = string("key_states_61")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1195, concat_123_values3_0))[name = string("concat_123")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_61, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([51])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([52])]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1195, concat_127_values3_0))[name = string("concat_127")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = var_8115, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_8265_begin_0 = const()[name = string("op_8265_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_8265_end_0 = const()[name = string("op_8265_end_0"), val = tensor([16, 8, 1024, 128])]; tensor var_8265_end_mask_0 = const()[name = string("op_8265_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8265_cast_fp16 = slice_by_index(begin = var_8265_begin_0, end = var_8265_end_0, end_mask = var_8265_end_mask_0, x = coreml_update_state_67)[name = string("op_8265_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_8265_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_8272_begin_0 = const()[name = string("op_8272_begin_0"), val = tensor([51, 0, 0, 0])]; tensor var_8272_end_0 = const()[name = string("op_8272_end_0"), val = tensor([52, 8, 1024, 128])]; tensor var_8272_end_mask_0 = const()[name = string("op_8272_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8272_cast_fp16 = slice_by_index(begin = var_8272_begin_0, end = var_8272_end_0, end_mask = var_8272_end_mask_0, x = coreml_update_state_67)[name = string("op_8272_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_8272_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_8309 = const()[name = string("op_8309"), val = tensor([1, 4, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_8309, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_8321 = const()[name = string("op_8321"), val = tensor([1, -1, 1024, 128])]; tensor key_states_63_cast_fp16 = reshape(shape = var_8321, x = x_309_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_8329 = const()[name = string("op_8329"), val = tensor([1, 4, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_8329, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; tensor var_8341 = const()[name = string("op_8341"), val = tensor([1, -1, 1024, 128])]; tensor value_states_93_cast_fp16 = reshape(shape = var_8341, x = x_315_cast_fp16)[name = string("value_states_93_cast_fp16")]; bool var_8356_transpose_x_1 = const()[name = string("op_8356_transpose_x_1"), val = bool(false)]; bool var_8356_transpose_y_1 = const()[name = string("op_8356_transpose_y_1"), val = bool(true)]; tensor var_8356 = matmul(transpose_x = var_8356_transpose_x_1, transpose_y = var_8356_transpose_y_1, x = query_states_61, y = key_states_63_cast_fp16)[name = string("op_8356")]; fp16 var_8357_to_fp16 = const()[name = string("op_8357_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_8356, y = var_8357_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_8392 = const()[name = string("op_8392"), val = int32(-1)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_8392, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = value_states_93_cast_fp16)[name = string("attn_output_151_cast_fp16")]; tensor var_8403_perm_0 = const()[name = string("op_8403_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8407 = const()[name = string("op_8407"), val = tensor([1, 1, 4096])]; tensor var_8403_cast_fp16 = transpose(perm = var_8403_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_16")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_8407, x = var_8403_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_8412 = const()[name = string("op_8412"), val = tensor([0, 2, 1])]; string var_8428_pad_type_0 = const()[name = string("op_8428_pad_type_0"), val = string("valid")]; int32 var_8428_groups_0 = const()[name = string("op_8428_groups_0"), val = int32(1)]; tensor var_8428_strides_0 = const()[name = string("op_8428_strides_0"), val = tensor([1])]; tensor var_8428_pad_0 = const()[name = string("op_8428_pad_0"), val = tensor([0, 0])]; tensor var_8428_dilations_0 = const()[name = string("op_8428_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933765696))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8413_cast_fp16 = transpose(perm = var_8412, x = attn_output_155_cast_fp16)[name = string("transpose_15")]; tensor var_8428_cast_fp16 = conv(dilations = var_8428_dilations_0, groups = var_8428_groups_0, pad = var_8428_pad_0, pad_type = var_8428_pad_type_0, strides = var_8428_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_8413_cast_fp16)[name = string("op_8428_cast_fp16")]; tensor var_8432 = const()[name = string("op_8432"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_8432, x = var_8428_cast_fp16)[name = string("transpose_14")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_8451_axes_0 = const()[name = string("op_8451_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933776000)))]; fp16 var_8439_to_fp16 = const()[name = string("op_8439_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8451_cast_fp16 = layer_norm(axes = var_8451_axes_0, epsilon = var_8439_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_8451_cast_fp16")]; tensor var_8465 = const()[name = string("op_8465"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_8466 = transpose(perm = var_8465, x = var_8451_cast_fp16)[name = string("transpose_13")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_8466)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_8488_axes_0 = const()[name = string("op_8488_axes_0"), val = tensor([2])]; tensor var_8488 = squeeze(axes = var_8488_axes_0, x = e_31)[name = string("op_8488")]; tensor var_8489 = const()[name = string("op_8489"), val = tensor([0, 2, 1])]; tensor var_8490 = transpose(perm = var_8489, x = var_8488)[name = string("transpose_12")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_8490)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_8508_axes_0 = const()[name = string("op_8508_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933781184)))]; fp16 var_8496_to_fp16 = const()[name = string("op_8496_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8508_cast_fp16 = layer_norm(axes = var_8508_axes_0, epsilon = var_8496_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_8508_cast_fp16")]; tensor var_8514 = const()[name = string("op_8514"), val = tensor([0, 2, 1])]; tensor var_8517_axes_0 = const()[name = string("op_8517_axes_0"), val = tensor([2])]; tensor var_8515 = transpose(perm = var_8514, x = var_8508_cast_fp16)[name = string("transpose_11")]; tensor var_8517 = expand_dims(axes = var_8517_axes_0, x = var_8515)[name = string("op_8517")]; string var_8533_pad_type_0 = const()[name = string("op_8533_pad_type_0"), val = string("valid")]; tensor var_8533_strides_0 = const()[name = string("op_8533_strides_0"), val = tensor([1, 1])]; tensor var_8533_pad_0 = const()[name = string("op_8533_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8533_dilations_0 = const()[name = string("op_8533_dilations_0"), val = tensor([1, 1])]; int32 var_8533_groups_0 = const()[name = string("op_8533_groups_0"), val = int32(1)]; tensor var_8533 = conv(dilations = var_8533_dilations_0, groups = var_8533_groups_0, pad = var_8533_pad_0, pad_type = var_8533_pad_type_0, strides = var_8533_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_8517)[name = string("op_8533")]; tensor var_8538 = const()[name = string("op_8538"), val = tensor([1, 32, 1, 128])]; tensor var_8539 = reshape(shape = var_8538, x = var_8533)[name = string("op_8539")]; string var_8555_pad_type_0 = const()[name = string("op_8555_pad_type_0"), val = string("valid")]; tensor var_8555_strides_0 = const()[name = string("op_8555_strides_0"), val = tensor([1, 1])]; tensor var_8555_pad_0 = const()[name = string("op_8555_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8555_dilations_0 = const()[name = string("op_8555_dilations_0"), val = tensor([1, 1])]; int32 var_8555_groups_0 = const()[name = string("op_8555_groups_0"), val = int32(1)]; tensor var_8555 = conv(dilations = var_8555_dilations_0, groups = var_8555_groups_0, pad = var_8555_pad_0, pad_type = var_8555_pad_type_0, strides = var_8555_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_8517)[name = string("op_8555")]; tensor var_8560 = const()[name = string("op_8560"), val = tensor([1, 8, 1, 128])]; tensor var_8561 = reshape(shape = var_8560, x = var_8555)[name = string("op_8561")]; string var_8577_pad_type_0 = const()[name = string("op_8577_pad_type_0"), val = string("valid")]; tensor var_8577_strides_0 = const()[name = string("op_8577_strides_0"), val = tensor([1, 1])]; tensor var_8577_pad_0 = const()[name = string("op_8577_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8577_dilations_0 = const()[name = string("op_8577_dilations_0"), val = tensor([1, 1])]; int32 var_8577_groups_0 = const()[name = string("op_8577_groups_0"), val = int32(1)]; tensor var_8577 = conv(dilations = var_8577_dilations_0, groups = var_8577_groups_0, pad = var_8577_pad_0, pad_type = var_8577_pad_type_0, strides = var_8577_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_8517)[name = string("op_8577")]; tensor var_8582 = const()[name = string("op_8582"), val = tensor([1, 8, 1, 128])]; tensor var_8583 = reshape(shape = var_8582, x = var_8577)[name = string("op_8583")]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = var_8539)[name = string("mean_131")]; tensor input_293 = sub(x = var_8539, y = mean_131)[name = string("input_293")]; tensor var_8604_axes_0 = const()[name = string("op_8604_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786368)))]; fp16 var_8592_to_fp16 = const()[name = string("op_8592_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8604_cast_fp16 = layer_norm(axes = var_8604_axes_0, epsilon = var_8592_to_fp16, gamma = model_model_layers_16_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_8604_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = var_8561)[name = string("mean_133")]; tensor input_295 = sub(x = var_8561, y = mean_133)[name = string("input_295")]; tensor var_8622_axes_0 = const()[name = string("op_8622_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786688)))]; fp16 var_8610_to_fp16 = const()[name = string("op_8610_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8622_cast_fp16 = layer_norm(axes = var_8622_axes_0, epsilon = var_8610_to_fp16, gamma = model_model_layers_16_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_8622_cast_fp16")]; tensor var_8625 = mul(x = var_8604_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8625")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_8604_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_8604_cast_fp16)[name = string("x2_65")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_8646 = mul(x = x2_65, y = const_293_promoted)[name = string("op_8646")]; int32 var_8648 = const()[name = string("op_8648"), val = int32(-1)]; bool var_8649_interleave_0 = const()[name = string("op_8649_interleave_0"), val = bool(false)]; tensor var_8649 = concat(axis = var_8648, interleave = var_8649_interleave_0, values = (var_8646, x1_65))[name = string("op_8649")]; tensor var_8650 = mul(x = var_8649, y = sin_1_cast_fp16)[name = string("op_8650")]; tensor query_states_65 = add(x = var_8625, y = var_8650)[name = string("query_states_65")]; tensor var_8653 = mul(x = var_8622_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8653")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_8622_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_8622_cast_fp16)[name = string("x2_67")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_8674 = mul(x = x2_67, y = const_296_promoted)[name = string("op_8674")]; int32 var_8676 = const()[name = string("op_8676"), val = int32(-1)]; bool var_8677_interleave_0 = const()[name = string("op_8677_interleave_0"), val = bool(false)]; tensor var_8677 = concat(axis = var_8676, interleave = var_8677_interleave_0, values = (var_8674, x1_67))[name = string("op_8677")]; tensor var_8678 = mul(x = var_8677, y = sin_1_cast_fp16)[name = string("op_8678")]; tensor key_states_65 = add(x = var_8653, y = var_8678)[name = string("key_states_65")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1195, concat_131_values3_0))[name = string("concat_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_65, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([52])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([53])]; int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1195, concat_135_values3_0))[name = string("concat_135")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = var_8583, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; tensor var_8733_begin_0 = const()[name = string("op_8733_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_8733_end_0 = const()[name = string("op_8733_end_0"), val = tensor([17, 8, 1024, 128])]; tensor var_8733_end_mask_0 = const()[name = string("op_8733_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8733_cast_fp16 = slice_by_index(begin = var_8733_begin_0, end = var_8733_end_0, end_mask = var_8733_end_mask_0, x = coreml_update_state_69)[name = string("op_8733_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_8733_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_8740_begin_0 = const()[name = string("op_8740_begin_0"), val = tensor([52, 0, 0, 0])]; tensor var_8740_end_0 = const()[name = string("op_8740_end_0"), val = tensor([53, 8, 1024, 128])]; tensor var_8740_end_mask_0 = const()[name = string("op_8740_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8740_cast_fp16 = slice_by_index(begin = var_8740_begin_0, end = var_8740_end_0, end_mask = var_8740_end_mask_0, x = coreml_update_state_69)[name = string("op_8740_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_8740_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_8777 = const()[name = string("op_8777"), val = tensor([1, 4, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_8777, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_8789 = const()[name = string("op_8789"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_8789, x = x_329_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_8797 = const()[name = string("op_8797"), val = tensor([1, 4, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_8797, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; tensor var_8809 = const()[name = string("op_8809"), val = tensor([1, -1, 1024, 128])]; tensor value_states_99_cast_fp16 = reshape(shape = var_8809, x = x_335_cast_fp16)[name = string("value_states_99_cast_fp16")]; bool var_8824_transpose_x_1 = const()[name = string("op_8824_transpose_x_1"), val = bool(false)]; bool var_8824_transpose_y_1 = const()[name = string("op_8824_transpose_y_1"), val = bool(true)]; tensor var_8824 = matmul(transpose_x = var_8824_transpose_x_1, transpose_y = var_8824_transpose_y_1, x = query_states_65, y = key_states_67_cast_fp16)[name = string("op_8824")]; fp16 var_8825_to_fp16 = const()[name = string("op_8825_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_8824, y = var_8825_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_8860 = const()[name = string("op_8860"), val = int32(-1)]; tensor attn_weights_101_cast_fp16 = softmax(axis = var_8860, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = value_states_99_cast_fp16)[name = string("attn_output_161_cast_fp16")]; tensor var_8871_perm_0 = const()[name = string("op_8871_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8875 = const()[name = string("op_8875"), val = tensor([1, 1, 4096])]; tensor var_8871_cast_fp16 = transpose(perm = var_8871_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_10")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_8875, x = var_8871_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_8880 = const()[name = string("op_8880"), val = tensor([0, 2, 1])]; string var_8896_pad_type_0 = const()[name = string("op_8896_pad_type_0"), val = string("valid")]; int32 var_8896_groups_0 = const()[name = string("op_8896_groups_0"), val = int32(1)]; tensor var_8896_strides_0 = const()[name = string("op_8896_strides_0"), val = tensor([1])]; tensor var_8896_pad_0 = const()[name = string("op_8896_pad_0"), val = tensor([0, 0])]; tensor var_8896_dilations_0 = const()[name = string("op_8896_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939029952))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8881_cast_fp16 = transpose(perm = var_8880, x = attn_output_165_cast_fp16)[name = string("transpose_9")]; tensor var_8896_cast_fp16 = conv(dilations = var_8896_dilations_0, groups = var_8896_groups_0, pad = var_8896_pad_0, pad_type = var_8896_pad_type_0, strides = var_8896_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_8881_cast_fp16)[name = string("op_8896_cast_fp16")]; tensor var_8900 = const()[name = string("op_8900"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_8900, x = var_8896_cast_fp16)[name = string("transpose_8")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_8919_axes_0 = const()[name = string("op_8919_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939040256)))]; fp16 var_8907_to_fp16 = const()[name = string("op_8907_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8919_cast_fp16 = layer_norm(axes = var_8919_axes_0, epsilon = var_8907_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_8919_cast_fp16")]; tensor var_8933 = const()[name = string("op_8933"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_8934 = transpose(perm = var_8933, x = var_8919_cast_fp16)[name = string("transpose_7")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_8934)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_8956_axes_0 = const()[name = string("op_8956_axes_0"), val = tensor([2])]; tensor var_8956 = squeeze(axes = var_8956_axes_0, x = e_33)[name = string("op_8956")]; tensor var_8957 = const()[name = string("op_8957"), val = tensor([0, 2, 1])]; tensor var_8958 = transpose(perm = var_8957, x = var_8956)[name = string("transpose_6")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_8958)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_8976_axes_0 = const()[name = string("op_8976_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939045440)))]; fp16 var_8964_to_fp16 = const()[name = string("op_8964_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8976_cast_fp16 = layer_norm(axes = var_8976_axes_0, epsilon = var_8964_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_8976_cast_fp16")]; tensor var_8982 = const()[name = string("op_8982"), val = tensor([0, 2, 1])]; tensor var_8985_axes_0 = const()[name = string("op_8985_axes_0"), val = tensor([2])]; tensor var_8983 = transpose(perm = var_8982, x = var_8976_cast_fp16)[name = string("transpose_5")]; tensor var_8985 = expand_dims(axes = var_8985_axes_0, x = var_8983)[name = string("op_8985")]; string var_9001_pad_type_0 = const()[name = string("op_9001_pad_type_0"), val = string("valid")]; tensor var_9001_strides_0 = const()[name = string("op_9001_strides_0"), val = tensor([1, 1])]; tensor var_9001_pad_0 = const()[name = string("op_9001_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9001_dilations_0 = const()[name = string("op_9001_dilations_0"), val = tensor([1, 1])]; int32 var_9001_groups_0 = const()[name = string("op_9001_groups_0"), val = int32(1)]; tensor var_9001 = conv(dilations = var_9001_dilations_0, groups = var_9001_groups_0, pad = var_9001_pad_0, pad_type = var_9001_pad_type_0, strides = var_9001_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_8985)[name = string("op_9001")]; tensor var_9006 = const()[name = string("op_9006"), val = tensor([1, 32, 1, 128])]; tensor var_9007 = reshape(shape = var_9006, x = var_9001)[name = string("op_9007")]; string var_9023_pad_type_0 = const()[name = string("op_9023_pad_type_0"), val = string("valid")]; tensor var_9023_strides_0 = const()[name = string("op_9023_strides_0"), val = tensor([1, 1])]; tensor var_9023_pad_0 = const()[name = string("op_9023_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9023_dilations_0 = const()[name = string("op_9023_dilations_0"), val = tensor([1, 1])]; int32 var_9023_groups_0 = const()[name = string("op_9023_groups_0"), val = int32(1)]; tensor var_9023 = conv(dilations = var_9023_dilations_0, groups = var_9023_groups_0, pad = var_9023_pad_0, pad_type = var_9023_pad_type_0, strides = var_9023_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_8985)[name = string("op_9023")]; tensor var_9028 = const()[name = string("op_9028"), val = tensor([1, 8, 1, 128])]; tensor var_9029 = reshape(shape = var_9028, x = var_9023)[name = string("op_9029")]; string var_9045_pad_type_0 = const()[name = string("op_9045_pad_type_0"), val = string("valid")]; tensor var_9045_strides_0 = const()[name = string("op_9045_strides_0"), val = tensor([1, 1])]; tensor var_9045_pad_0 = const()[name = string("op_9045_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9045_dilations_0 = const()[name = string("op_9045_dilations_0"), val = tensor([1, 1])]; int32 var_9045_groups_0 = const()[name = string("op_9045_groups_0"), val = int32(1)]; tensor var_9045 = conv(dilations = var_9045_dilations_0, groups = var_9045_groups_0, pad = var_9045_pad_0, pad_type = var_9045_pad_type_0, strides = var_9045_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_8985)[name = string("op_9045")]; tensor var_9050 = const()[name = string("op_9050"), val = tensor([1, 8, 1, 128])]; tensor var_9051 = reshape(shape = var_9050, x = var_9045)[name = string("op_9051")]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = var_9007)[name = string("mean_139")]; tensor input_311 = sub(x = var_9007, y = mean_139)[name = string("input_311")]; tensor var_9072_axes_0 = const()[name = string("op_9072_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050624)))]; fp16 var_9060_to_fp16 = const()[name = string("op_9060_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9072_cast_fp16 = layer_norm(axes = var_9072_axes_0, epsilon = var_9060_to_fp16, gamma = model_model_layers_17_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_9072_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = var_9029)[name = string("mean_141")]; tensor input_313 = sub(x = var_9029, y = mean_141)[name = string("input_313")]; tensor var_9090_axes_0 = const()[name = string("op_9090_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050944)))]; fp16 var_9078_to_fp16 = const()[name = string("op_9078_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9090_cast_fp16 = layer_norm(axes = var_9090_axes_0, epsilon = var_9078_to_fp16, gamma = model_model_layers_17_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_9090_cast_fp16")]; tensor var_9093 = mul(x = var_9072_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9093")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_9072_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_9072_cast_fp16)[name = string("x2_69")]; fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; tensor var_9114 = mul(x = x2_69, y = const_311_promoted)[name = string("op_9114")]; int32 var_9116 = const()[name = string("op_9116"), val = int32(-1)]; bool var_9117_interleave_0 = const()[name = string("op_9117_interleave_0"), val = bool(false)]; tensor var_9117 = concat(axis = var_9116, interleave = var_9117_interleave_0, values = (var_9114, x1_69))[name = string("op_9117")]; tensor var_9118 = mul(x = var_9117, y = sin_1_cast_fp16)[name = string("op_9118")]; tensor query_states_69 = add(x = var_9093, y = var_9118)[name = string("query_states_69")]; tensor var_9121 = mul(x = var_9090_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9121")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_9090_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_9090_cast_fp16)[name = string("x2")]; fp16 const_314_promoted = const()[name = string("const_314_promoted"), val = fp16(-0x1p+0)]; tensor var_9142 = mul(x = x2, y = const_314_promoted)[name = string("op_9142")]; int32 var_9144 = const()[name = string("op_9144"), val = int32(-1)]; bool var_9145_interleave_0 = const()[name = string("op_9145_interleave_0"), val = bool(false)]; tensor var_9145 = concat(axis = var_9144, interleave = var_9145_interleave_0, values = (var_9142, x1))[name = string("op_9145")]; tensor var_9146 = mul(x = var_9145, y = sin_1_cast_fp16)[name = string("op_9146")]; tensor key_states_69 = add(x = var_9121, y = var_9146)[name = string("key_states_69")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_138")]; tensor concat_139_values1_0 = const()[name = string("concat_139_values1_0"), val = tensor([0])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_208, concat_139_values1_0, var_1195, concat_139_values3_0))[name = string("concat_139")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_138, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_139, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_69, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([53])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([54])]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_142")]; tensor concat_143_values1_0 = const()[name = string("concat_143_values1_0"), val = tensor([0])]; tensor concat_143_values3_0 = const()[name = string("concat_143_values3_0"), val = tensor([0])]; int32 concat_143_axis_0 = const()[name = string("concat_143_axis_0"), val = int32(0)]; bool concat_143_interleave_0 = const()[name = string("concat_143_interleave_0"), val = bool(false)]; tensor concat_143 = concat(axis = concat_143_axis_0, interleave = concat_143_interleave_0, values = (expand_dims_214, concat_143_values1_0, var_1195, concat_143_values3_0))[name = string("concat_143")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_142, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_143, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = var_9051, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; tensor var_9201_begin_0 = const()[name = string("op_9201_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_9201_end_0 = const()[name = string("op_9201_end_0"), val = tensor([18, 8, 1024, 128])]; tensor var_9201_end_mask_0 = const()[name = string("op_9201_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9201_cast_fp16 = slice_by_index(begin = var_9201_begin_0, end = var_9201_end_0, end_mask = var_9201_end_mask_0, x = coreml_update_state_71)[name = string("op_9201_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_9201_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_9208_begin_0 = const()[name = string("op_9208_begin_0"), val = tensor([53, 0, 0, 0])]; tensor var_9208_end_0 = const()[name = string("op_9208_end_0"), val = tensor([54, 8, 1024, 128])]; tensor var_9208_end_mask_0 = const()[name = string("op_9208_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9208_cast_fp16 = slice_by_index(begin = var_9208_begin_0, end = var_9208_end_0, end_mask = var_9208_end_mask_0, x = coreml_update_state_71)[name = string("op_9208_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_9208_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_9245 = const()[name = string("op_9245"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_9245, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_9257 = const()[name = string("op_9257"), val = tensor([1, -1, 1024, 128])]; tensor key_states_cast_fp16 = reshape(shape = var_9257, x = x_349_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_9265 = const()[name = string("op_9265"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_9265, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_9277 = const()[name = string("op_9277"), val = tensor([1, -1, 1024, 128])]; tensor value_states_105_cast_fp16 = reshape(shape = var_9277, x = x_355_cast_fp16)[name = string("value_states_105_cast_fp16")]; bool var_9292_transpose_x_1 = const()[name = string("op_9292_transpose_x_1"), val = bool(false)]; bool var_9292_transpose_y_1 = const()[name = string("op_9292_transpose_y_1"), val = bool(true)]; tensor var_9292 = matmul(transpose_x = var_9292_transpose_x_1, transpose_y = var_9292_transpose_y_1, x = query_states_69, y = key_states_cast_fp16)[name = string("op_9292")]; fp16 var_9293_to_fp16 = const()[name = string("op_9293_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_9292, y = var_9293_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_9328 = const()[name = string("op_9328"), val = int32(-1)]; tensor attn_weights_cast_fp16 = softmax(axis = var_9328, x = attn_weights_105_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_105_cast_fp16)[name = string("attn_output_171_cast_fp16")]; tensor var_9339_perm_0 = const()[name = string("op_9339_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9343 = const()[name = string("op_9343"), val = tensor([1, 1, 4096])]; tensor var_9339_cast_fp16 = transpose(perm = var_9339_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_4")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_9343, x = var_9339_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_9348 = const()[name = string("op_9348"), val = tensor([0, 2, 1])]; string var_9364_pad_type_0 = const()[name = string("op_9364_pad_type_0"), val = string("valid")]; int32 var_9364_groups_0 = const()[name = string("op_9364_groups_0"), val = int32(1)]; tensor var_9364_strides_0 = const()[name = string("op_9364_strides_0"), val = tensor([1])]; tensor var_9364_pad_0 = const()[name = string("op_9364_pad_0"), val = tensor([0, 0])]; tensor var_9364_dilations_0 = const()[name = string("op_9364_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939051264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944294208))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9349_cast_fp16 = transpose(perm = var_9348, x = attn_output_175_cast_fp16)[name = string("transpose_3")]; tensor var_9364_cast_fp16 = conv(dilations = var_9364_dilations_0, groups = var_9364_groups_0, pad = var_9364_pad_0, pad_type = var_9364_pad_type_0, strides = var_9364_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_9349_cast_fp16)[name = string("op_9364_cast_fp16")]; tensor var_9368 = const()[name = string("op_9368"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_9368, x = var_9364_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_107_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor mean_143_axes_0 = const()[name = string("mean_143_axes_0"), val = tensor([-1])]; bool mean_143_keep_dims_0 = const()[name = string("mean_143_keep_dims_0"), val = bool(true)]; tensor mean_143_cast_fp16 = reduce_mean(axes = mean_143_axes_0, keep_dims = mean_143_keep_dims_0, x = hidden_states_107_cast_fp16)[name = string("mean_143_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_107_cast_fp16, y = mean_143_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_9387_axes_0 = const()[name = string("op_9387_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944304512)))]; fp16 var_9375_to_fp16 = const()[name = string("op_9375_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9387_cast_fp16 = layer_norm(axes = var_9387_axes_0, epsilon = var_9375_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_9387_cast_fp16")]; tensor var_9401 = const()[name = string("op_9401"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_9402 = transpose(perm = var_9401, x = var_9387_cast_fp16)[name = string("transpose_1")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_9402)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b")]; tensor c = silu(x = input_321)[name = string("c")]; tensor input_323 = mul(x = c, y = b)[name = string("input_323")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_323)[name = string("e")]; tensor var_9424_axes_0 = const()[name = string("op_9424_axes_0"), val = tensor([2])]; tensor var_9424 = squeeze(axes = var_9424_axes_0, x = e)[name = string("op_9424")]; tensor var_9425 = const()[name = string("op_9425"), val = tensor([0, 2, 1])]; tensor var_9426 = transpose(perm = var_9425, x = var_9424)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = var_9426)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_9444_axes_0 = const()[name = string("op_9444_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944309696)))]; fp16 var_9432_to_fp16 = const()[name = string("op_9432_to_fp16"), val = fp16(0x1.1p-20)]; tensor output_hidden_states = layer_norm(axes = var_9444_axes_0, epsilon = var_9432_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_9444_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5243008))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5259456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6570240))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6574400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885184))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20341248))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20380224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32832128))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32871104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45323008))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50576256))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50592704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51903488))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51907648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53218432))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53222592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65674496))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78165376))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78204352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90656256))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90666560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95909504))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97236736))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97240896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98551680))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98555840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111007744))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111046720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123498624))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123537600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135989504))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135999808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141242752))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141259200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142569984))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142574144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143884928))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143889088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156340992))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156379968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168831872))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168870848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181322752))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181333056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186576000))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186592448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187903232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187907392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189218176))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189222336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201674240))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201713216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214165120))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214204096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226656000))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226666304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231909248))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231925696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233236480))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233240640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234551424))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234555584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247007488))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247046464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259498368))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259537344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271989248))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271999552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277242496))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278569728))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278573888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279884672))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279888832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292340736))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292379712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304831616))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304870592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317322496))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317332800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322575744))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322592192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323902976))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323907136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325217920))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325222080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337673984))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337712960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350164864))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350203840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362655744))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362666048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367908992))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367925440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369236224))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369240384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370551168))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370555328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383007232))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383046208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395498112))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395537088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407988992))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407999296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413242240))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413258688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414569472))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414573632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415884416))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415888576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428340480))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428379456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440831360))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440870336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453322240))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453332544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458575488))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458591936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459902720))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459906880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461217664))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461221824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473673728))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473712704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486164608))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486203584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498655488))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503908736))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503925184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505235968))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505240128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506550912))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506555072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519006976))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519045952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531497856))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531536832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543988736))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543999040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549241984))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549258432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550569216))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550573376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551884160))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551888320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564340224))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564379200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576831104))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576870080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589321984))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589332288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594575232))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594591680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595902464))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595906624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597217408))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597221568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609673472))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609712448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622164352))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622203328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634655232))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634665536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639908480))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639924928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641235712))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641239872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642550656))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642554816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655006720))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655045696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667497600))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667536576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679988480))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685241728))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685258176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686568960))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686573120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687883904))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700339968))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712830848))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712869824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725321728))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725332032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730574976))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731902208))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731906368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733217152))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733221312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745673216))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745712192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758164096))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758203072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770654976))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770665280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775908224))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775924672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777235456))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777239616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778550400))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778554560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791006464))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791045440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803497344))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803536320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815988224))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_975_axis_0 = const()[name = string("op_975_axis_0"), val = int32(1)]; int32 var_975_batch_dims_0 = const()[name = string("op_975_batch_dims_0"), val = int32(0)]; bool var_975_validate_indices_0 = const()[name = string("op_975_validate_indices_0"), val = bool(false)]; tensor var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832775808)))]; tensor var_975_cast_fp16 = gather(axis = var_975_axis_0, batch_dims = var_975_batch_dims_0, indices = select_0, validate_indices = var_975_validate_indices_0, x = var_967_to_fp16)[name = string("op_975_cast_fp16")]; tensor var_979 = const()[name = string("op_979"), val = tensor([1, 64, 1, 128])]; tensor cos_1_cast_fp16 = reshape(shape = var_979, x = var_975_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_989_axis_0 = const()[name = string("op_989_axis_0"), val = int32(1)]; int32 var_989_batch_dims_0 = const()[name = string("op_989_batch_dims_0"), val = int32(0)]; bool var_989_validate_indices_0 = const()[name = string("op_989_validate_indices_0"), val = bool(false)]; tensor var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815998528)))]; tensor var_989_cast_fp16 = gather(axis = var_989_axis_0, batch_dims = var_989_batch_dims_0, indices = select_0, validate_indices = var_989_validate_indices_0, x = var_981_to_fp16)[name = string("op_989_cast_fp16")]; tensor var_993 = const()[name = string("op_993"), val = tensor([1, 64, 1, 128])]; tensor sin_1_cast_fp16 = reshape(shape = var_993, x = var_989_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1020_axes_0 = const()[name = string("op_1020_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849553088)))]; fp16 var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1020_cast_fp16 = layer_norm(axes = var_1020_axes_0, epsilon = var_1008_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, 1])]; tensor var_1035_axes_0 = const()[name = string("op_1035_axes_0"), val = tensor([2])]; tensor var_1033 = transpose(perm = var_1032, x = var_1020_cast_fp16)[name = string("transpose_163")]; tensor var_1035 = expand_dims(axes = var_1035_axes_0, x = var_1033)[name = string("op_1035")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1035)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1035)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1035)[name = string("value_states_1")]; tensor var_1077 = const()[name = string("op_1077"), val = tensor([1, 32, 128, 64])]; tensor var_1078 = reshape(shape = var_1077, x = query_states_1)[name = string("op_1078")]; tensor var_1083 = const()[name = string("op_1083"), val = tensor([0, 1, 3, 2])]; tensor var_1088 = const()[name = string("op_1088"), val = tensor([1, 8, 128, 64])]; tensor var_1089 = reshape(shape = var_1088, x = key_states_1)[name = string("op_1089")]; tensor var_1094 = const()[name = string("op_1094"), val = tensor([0, 1, 3, 2])]; tensor var_1099 = const()[name = string("op_1099"), val = tensor([1, 8, 128, 64])]; tensor var_1100 = reshape(shape = var_1099, x = value_states_1)[name = string("op_1100")]; tensor var_1105 = const()[name = string("op_1105"), val = tensor([0, 1, 3, 2])]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor x_1 = transpose(perm = var_1083, x = var_1078)[name = string("transpose_162")]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = x_1)[name = string("mean_3")]; tensor input_5 = sub(x = x_1, y = mean_3)[name = string("input_5")]; tensor var_1122_axes_0 = const()[name = string("op_1122_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558272)))]; fp16 var_1110_to_fp16 = const()[name = string("op_1110_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1122_cast_fp16 = layer_norm(axes = var_1122_axes_0, epsilon = var_1110_to_fp16, gamma = model_model_layers_0_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1122_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor x_3 = transpose(perm = var_1094, x = var_1089)[name = string("transpose_161")]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = x_3)[name = string("mean_5")]; tensor input_7 = sub(x = x_3, y = mean_5)[name = string("input_7")]; tensor var_1140_axes_0 = const()[name = string("op_1140_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558592)))]; fp16 var_1128_to_fp16 = const()[name = string("op_1128_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1140_cast_fp16 = layer_norm(axes = var_1140_axes_0, epsilon = var_1128_to_fp16, gamma = model_model_layers_0_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1140_cast_fp16")]; tensor var_1147 = const()[name = string("op_1147"), val = tensor([0, 2, 1, 3])]; tensor var_1153 = const()[name = string("op_1153"), val = tensor([0, 2, 1, 3])]; tensor cos_5 = transpose(perm = var_1147, x = cos_1_cast_fp16)[name = string("transpose_160")]; tensor var_1155 = mul(x = var_1122_cast_fp16, y = cos_5)[name = string("op_1155")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1122_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1122_cast_fp16)[name = string("x2_1")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1176 = mul(x = x2_1, y = const_7_promoted)[name = string("op_1176")]; int32 var_1178 = const()[name = string("op_1178"), val = int32(-1)]; bool var_1179_interleave_0 = const()[name = string("op_1179_interleave_0"), val = bool(false)]; tensor var_1179 = concat(axis = var_1178, interleave = var_1179_interleave_0, values = (var_1176, x1_1))[name = string("op_1179")]; tensor sin_5 = transpose(perm = var_1153, x = sin_1_cast_fp16)[name = string("transpose_159")]; tensor var_1180 = mul(x = var_1179, y = sin_5)[name = string("op_1180")]; tensor query_states_3 = add(x = var_1155, y = var_1180)[name = string("query_states_3")]; tensor var_1183 = mul(x = var_1140_cast_fp16, y = cos_5)[name = string("op_1183")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1140_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1140_cast_fp16)[name = string("x2_3")]; fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; tensor var_1204 = mul(x = x2_3, y = const_10_promoted)[name = string("op_1204")]; int32 var_1206 = const()[name = string("op_1206"), val = int32(-1)]; bool var_1207_interleave_0 = const()[name = string("op_1207_interleave_0"), val = bool(false)]; tensor var_1207 = concat(axis = var_1206, interleave = var_1207_interleave_0, values = (var_1204, x1_3))[name = string("op_1207")]; tensor var_1208 = mul(x = var_1207, y = sin_5)[name = string("op_1208")]; tensor key_states_3 = add(x = var_1183, y = var_1208)[name = string("key_states_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_1230 = add(x = current_pos, y = seq_length_1)[name = string("op_1230")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1230, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_36")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([36])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([37])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1230, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_1105, x = var_1100)[name = string("transpose_158")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_37")]; tensor var_1279_begin_0 = const()[name = string("op_1279_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1279_end_0 = const()[name = string("op_1279_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_1279_end_mask_0 = const()[name = string("op_1279_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = coreml_update_state_37)[name = string("op_1279_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1279_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1286_begin_0 = const()[name = string("op_1286_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_1286_end_0 = const()[name = string("op_1286_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_1286_end_mask_0 = const()[name = string("op_1286_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1286_cast_fp16 = slice_by_index(begin = var_1286_begin_0, end = var_1286_end_0, end_mask = var_1286_end_mask_0, x = coreml_update_state_37)[name = string("op_1286_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1286_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1315 = const()[name = string("op_1315"), val = tensor([1, 4, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1315, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1327 = const()[name = string("op_1327"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_1327, x = x_9_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1335 = const()[name = string("op_1335"), val = tensor([1, 4, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1335, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; bool var_1362_transpose_x_0 = const()[name = string("op_1362_transpose_x_0"), val = bool(false)]; bool var_1362_transpose_y_0 = const()[name = string("op_1362_transpose_y_0"), val = bool(true)]; tensor var_1362 = matmul(transpose_x = var_1362_transpose_x_0, transpose_y = var_1362_transpose_y_0, x = query_states_3, y = key_states_7_cast_fp16)[name = string("op_1362")]; fp16 var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1362, y = var_1363_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1398 = const()[name = string("op_1398"), val = int32(-1)]; tensor var_1400_cast_fp16 = softmax(axis = var_1398, x = attn_weights_3_cast_fp16)[name = string("op_1400_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 1024])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_1400_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1024, 128])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_15_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 128])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_1412_perm_0 = const()[name = string("op_1412_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1431 = const()[name = string("op_1431"), val = tensor([1, 64, 4096])]; tensor var_1412_cast_fp16 = transpose(perm = var_1412_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_157")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1431, x = var_1412_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1436 = const()[name = string("op_1436"), val = tensor([0, 2, 1])]; string var_1452_pad_type_0 = const()[name = string("op_1452_pad_type_0"), val = string("valid")]; int32 var_1452_groups_0 = const()[name = string("op_1452_groups_0"), val = int32(1)]; tensor var_1452_strides_0 = const()[name = string("op_1452_strides_0"), val = tensor([1])]; tensor var_1452_pad_0 = const()[name = string("op_1452_pad_0"), val = tensor([0, 0])]; tensor var_1452_dilations_0 = const()[name = string("op_1452_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854801856))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1437_cast_fp16 = transpose(perm = var_1436, x = attn_output_5_cast_fp16)[name = string("transpose_156")]; tensor var_1452_cast_fp16 = conv(dilations = var_1452_dilations_0, groups = var_1452_groups_0, pad = var_1452_pad_0, pad_type = var_1452_pad_type_0, strides = var_1452_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1437_cast_fp16)[name = string("op_1452_cast_fp16")]; tensor var_1456 = const()[name = string("op_1456"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1456, x = var_1452_cast_fp16)[name = string("transpose_155")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_1475_axes_0 = const()[name = string("op_1475_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854812160)))]; fp16 var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1475_cast_fp16 = layer_norm(axes = var_1475_axes_0, epsilon = var_1463_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_1475_cast_fp16")]; tensor var_1489 = const()[name = string("op_1489"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_1490 = transpose(perm = var_1489, x = var_1475_cast_fp16)[name = string("transpose_154")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_1490)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_1512_axes_0 = const()[name = string("op_1512_axes_0"), val = tensor([2])]; tensor var_1512 = squeeze(axes = var_1512_axes_0, x = e_1)[name = string("op_1512")]; tensor var_1513 = const()[name = string("op_1513"), val = tensor([0, 2, 1])]; tensor var_1514 = transpose(perm = var_1513, x = var_1512)[name = string("transpose_153")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_1514)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_1532_axes_0 = const()[name = string("op_1532_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854817344)))]; fp16 var_1520_to_fp16 = const()[name = string("op_1520_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1532_cast_fp16 = layer_norm(axes = var_1532_axes_0, epsilon = var_1520_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_1532_cast_fp16")]; tensor var_1544 = const()[name = string("op_1544"), val = tensor([0, 2, 1])]; tensor var_1547_axes_0 = const()[name = string("op_1547_axes_0"), val = tensor([2])]; tensor var_1545 = transpose(perm = var_1544, x = var_1532_cast_fp16)[name = string("transpose_152")]; tensor var_1547 = expand_dims(axes = var_1547_axes_0, x = var_1545)[name = string("op_1547")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_1547)[name = string("query_states_9")]; string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_1547)[name = string("key_states_11")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_1547)[name = string("value_states_9")]; tensor var_1589 = const()[name = string("op_1589"), val = tensor([1, 32, 128, 64])]; tensor var_1590 = reshape(shape = var_1589, x = query_states_9)[name = string("op_1590")]; tensor var_1595 = const()[name = string("op_1595"), val = tensor([0, 1, 3, 2])]; tensor var_1600 = const()[name = string("op_1600"), val = tensor([1, 8, 128, 64])]; tensor var_1601 = reshape(shape = var_1600, x = key_states_11)[name = string("op_1601")]; tensor var_1606 = const()[name = string("op_1606"), val = tensor([0, 1, 3, 2])]; tensor var_1611 = const()[name = string("op_1611"), val = tensor([1, 8, 128, 64])]; tensor var_1612 = reshape(shape = var_1611, x = value_states_9)[name = string("op_1612")]; tensor var_1617 = const()[name = string("op_1617"), val = tensor([0, 1, 3, 2])]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor x_21 = transpose(perm = var_1595, x = var_1590)[name = string("transpose_151")]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = x_21)[name = string("mean_11")]; tensor input_23 = sub(x = x_21, y = mean_11)[name = string("input_23")]; tensor var_1634_axes_0 = const()[name = string("op_1634_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822528)))]; fp16 var_1622_to_fp16 = const()[name = string("op_1622_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1634_cast_fp16 = layer_norm(axes = var_1634_axes_0, epsilon = var_1622_to_fp16, gamma = model_model_layers_1_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_1634_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor x_23 = transpose(perm = var_1606, x = var_1601)[name = string("transpose_150")]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = x_23)[name = string("mean_13")]; tensor input_25 = sub(x = x_23, y = mean_13)[name = string("input_25")]; tensor var_1652_axes_0 = const()[name = string("op_1652_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822848)))]; fp16 var_1640_to_fp16 = const()[name = string("op_1640_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1652_cast_fp16 = layer_norm(axes = var_1652_axes_0, epsilon = var_1640_to_fp16, gamma = model_model_layers_1_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_1652_cast_fp16")]; tensor var_1667 = mul(x = var_1634_cast_fp16, y = cos_5)[name = string("op_1667")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_1634_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_1634_cast_fp16)[name = string("x2_5")]; fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; tensor var_1688 = mul(x = x2_5, y = const_29_promoted)[name = string("op_1688")]; int32 var_1690 = const()[name = string("op_1690"), val = int32(-1)]; bool var_1691_interleave_0 = const()[name = string("op_1691_interleave_0"), val = bool(false)]; tensor var_1691 = concat(axis = var_1690, interleave = var_1691_interleave_0, values = (var_1688, x1_5))[name = string("op_1691")]; tensor var_1692 = mul(x = var_1691, y = sin_5)[name = string("op_1692")]; tensor query_states_11 = add(x = var_1667, y = var_1692)[name = string("query_states_11")]; tensor var_1695 = mul(x = var_1652_cast_fp16, y = cos_5)[name = string("op_1695")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_1652_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_1652_cast_fp16)[name = string("x2_7")]; fp16 const_32_promoted = const()[name = string("const_32_promoted"), val = fp16(-0x1p+0)]; tensor var_1716 = mul(x = x2_7, y = const_32_promoted)[name = string("op_1716")]; int32 var_1718 = const()[name = string("op_1718"), val = int32(-1)]; bool var_1719_interleave_0 = const()[name = string("op_1719_interleave_0"), val = bool(false)]; tensor var_1719 = concat(axis = var_1718, interleave = var_1719_interleave_0, values = (var_1716, x1_7))[name = string("op_1719")]; tensor var_1720 = mul(x = var_1719, y = sin_5)[name = string("op_1720")]; tensor key_states_13 = add(x = var_1695, y = var_1720)[name = string("key_states_13")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_1230, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_13, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_38")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([37])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([38])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_1230, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11 = transpose(perm = var_1617, x = var_1612)[name = string("transpose_149")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_39")]; tensor var_1791_begin_0 = const()[name = string("op_1791_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_1791_end_0 = const()[name = string("op_1791_end_0"), val = tensor([2, 8, 1024, 128])]; tensor var_1791_end_mask_0 = const()[name = string("op_1791_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1791_cast_fp16 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, x = coreml_update_state_39)[name = string("op_1791_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_1791_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_1798_begin_0 = const()[name = string("op_1798_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_1798_end_0 = const()[name = string("op_1798_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_1798_end_mask_0 = const()[name = string("op_1798_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1798_cast_fp16 = slice_by_index(begin = var_1798_begin_0, end = var_1798_end_0, end_mask = var_1798_end_mask_0, x = coreml_update_state_39)[name = string("op_1798_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_1798_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_1827 = const()[name = string("op_1827"), val = tensor([1, 4, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_1827, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_1839 = const()[name = string("op_1839"), val = tensor([1, -1, 1024, 128])]; tensor key_states_17_cast_fp16 = reshape(shape = var_1839, x = x_29_cast_fp16)[name = string("key_states_17_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_1847 = const()[name = string("op_1847"), val = tensor([1, 4, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_1847, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; bool var_1874_transpose_x_0 = const()[name = string("op_1874_transpose_x_0"), val = bool(false)]; bool var_1874_transpose_y_0 = const()[name = string("op_1874_transpose_y_0"), val = bool(true)]; tensor var_1874 = matmul(transpose_x = var_1874_transpose_x_0, transpose_y = var_1874_transpose_y_0, x = query_states_11, y = key_states_17_cast_fp16)[name = string("op_1874")]; fp16 var_1875_to_fp16 = const()[name = string("op_1875_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_1874, y = var_1875_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("attn_weights_7_cast_fp16")]; int32 var_1910 = const()[name = string("op_1910"), val = int32(-1)]; tensor var_1912_cast_fp16 = softmax(axis = var_1910, x = attn_weights_7_cast_fp16)[name = string("op_1912_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 1024])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_1912_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1024, 128])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_35_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 128])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_1924_perm_0 = const()[name = string("op_1924_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1943 = const()[name = string("op_1943"), val = tensor([1, 64, 4096])]; tensor var_1924_cast_fp16 = transpose(perm = var_1924_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_148")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_1943, x = var_1924_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_1948 = const()[name = string("op_1948"), val = tensor([0, 2, 1])]; string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")]; int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)]; tensor var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor([1])]; tensor var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor([0, 0])]; tensor var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854823168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860066112))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1949_cast_fp16 = transpose(perm = var_1948, x = attn_output_15_cast_fp16)[name = string("transpose_147")]; tensor var_1964_cast_fp16 = conv(dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1949_cast_fp16)[name = string("op_1964_cast_fp16")]; tensor var_1968 = const()[name = string("op_1968"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_1968, x = var_1964_cast_fp16)[name = string("transpose_146")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_1987_axes_0 = const()[name = string("op_1987_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860076416)))]; fp16 var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1987_cast_fp16 = layer_norm(axes = var_1987_axes_0, epsilon = var_1975_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_1987_cast_fp16")]; tensor var_2001 = const()[name = string("op_2001"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_2002 = transpose(perm = var_2001, x = var_1987_cast_fp16)[name = string("transpose_145")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2002)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_2024_axes_0 = const()[name = string("op_2024_axes_0"), val = tensor([2])]; tensor var_2024 = squeeze(axes = var_2024_axes_0, x = e_3)[name = string("op_2024")]; tensor var_2025 = const()[name = string("op_2025"), val = tensor([0, 2, 1])]; tensor var_2026 = transpose(perm = var_2025, x = var_2024)[name = string("transpose_144")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_2026)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_2044_axes_0 = const()[name = string("op_2044_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860081600)))]; fp16 var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2044_cast_fp16 = layer_norm(axes = var_2044_axes_0, epsilon = var_2032_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_2044_cast_fp16")]; tensor var_2056 = const()[name = string("op_2056"), val = tensor([0, 2, 1])]; tensor var_2059_axes_0 = const()[name = string("op_2059_axes_0"), val = tensor([2])]; tensor var_2057 = transpose(perm = var_2056, x = var_2044_cast_fp16)[name = string("transpose_143")]; tensor var_2059 = expand_dims(axes = var_2059_axes_0, x = var_2057)[name = string("op_2059")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2059)[name = string("query_states_17")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2059)[name = string("key_states_21")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2059)[name = string("value_states_17")]; tensor var_2101 = const()[name = string("op_2101"), val = tensor([1, 32, 128, 64])]; tensor var_2102 = reshape(shape = var_2101, x = query_states_17)[name = string("op_2102")]; tensor var_2107 = const()[name = string("op_2107"), val = tensor([0, 1, 3, 2])]; tensor var_2112 = const()[name = string("op_2112"), val = tensor([1, 8, 128, 64])]; tensor var_2113 = reshape(shape = var_2112, x = key_states_21)[name = string("op_2113")]; tensor var_2118 = const()[name = string("op_2118"), val = tensor([0, 1, 3, 2])]; tensor var_2123 = const()[name = string("op_2123"), val = tensor([1, 8, 128, 64])]; tensor var_2124 = reshape(shape = var_2123, x = value_states_17)[name = string("op_2124")]; tensor var_2129 = const()[name = string("op_2129"), val = tensor([0, 1, 3, 2])]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor x_41 = transpose(perm = var_2107, x = var_2102)[name = string("transpose_142")]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = x_41)[name = string("mean_19")]; tensor input_41 = sub(x = x_41, y = mean_19)[name = string("input_41")]; tensor var_2146_axes_0 = const()[name = string("op_2146_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860086784)))]; fp16 var_2134_to_fp16 = const()[name = string("op_2134_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2146_cast_fp16 = layer_norm(axes = var_2146_axes_0, epsilon = var_2134_to_fp16, gamma = model_model_layers_2_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2146_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor x_43 = transpose(perm = var_2118, x = var_2113)[name = string("transpose_141")]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = x_43)[name = string("mean_21")]; tensor input_43 = sub(x = x_43, y = mean_21)[name = string("input_43")]; tensor var_2164_axes_0 = const()[name = string("op_2164_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087104)))]; fp16 var_2152_to_fp16 = const()[name = string("op_2152_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2164_cast_fp16 = layer_norm(axes = var_2164_axes_0, epsilon = var_2152_to_fp16, gamma = model_model_layers_2_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2164_cast_fp16")]; tensor var_2179 = mul(x = var_2146_cast_fp16, y = cos_5)[name = string("op_2179")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2146_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2146_cast_fp16)[name = string("x2_9")]; fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; tensor var_2200 = mul(x = x2_9, y = const_51_promoted)[name = string("op_2200")]; int32 var_2202 = const()[name = string("op_2202"), val = int32(-1)]; bool var_2203_interleave_0 = const()[name = string("op_2203_interleave_0"), val = bool(false)]; tensor var_2203 = concat(axis = var_2202, interleave = var_2203_interleave_0, values = (var_2200, x1_9))[name = string("op_2203")]; tensor var_2204 = mul(x = var_2203, y = sin_5)[name = string("op_2204")]; tensor query_states_19 = add(x = var_2179, y = var_2204)[name = string("query_states_19")]; tensor var_2207 = mul(x = var_2164_cast_fp16, y = cos_5)[name = string("op_2207")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2164_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2164_cast_fp16)[name = string("x2_11")]; fp16 const_54_promoted = const()[name = string("const_54_promoted"), val = fp16(-0x1p+0)]; tensor var_2228 = mul(x = x2_11, y = const_54_promoted)[name = string("op_2228")]; int32 var_2230 = const()[name = string("op_2230"), val = int32(-1)]; bool var_2231_interleave_0 = const()[name = string("op_2231_interleave_0"), val = bool(false)]; tensor var_2231 = concat(axis = var_2230, interleave = var_2231_interleave_0, values = (var_2228, x1_11))[name = string("op_2231")]; tensor var_2232 = mul(x = var_2231, y = sin_5)[name = string("op_2232")]; tensor key_states_23 = add(x = var_2207, y = var_2232)[name = string("key_states_23")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_1230, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_23, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_40")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([38])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([39])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_1230, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19 = transpose(perm = var_2129, x = var_2124)[name = string("transpose_140")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_41")]; tensor var_2303_begin_0 = const()[name = string("op_2303_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2303_end_0 = const()[name = string("op_2303_end_0"), val = tensor([3, 8, 1024, 128])]; tensor var_2303_end_mask_0 = const()[name = string("op_2303_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2303_cast_fp16 = slice_by_index(begin = var_2303_begin_0, end = var_2303_end_0, end_mask = var_2303_end_mask_0, x = coreml_update_state_41)[name = string("op_2303_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2303_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2310_begin_0 = const()[name = string("op_2310_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_2310_end_0 = const()[name = string("op_2310_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_2310_end_mask_0 = const()[name = string("op_2310_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = coreml_update_state_41)[name = string("op_2310_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2310_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2339 = const()[name = string("op_2339"), val = tensor([1, 4, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2339, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2351 = const()[name = string("op_2351"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_2351, x = x_49_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2359 = const()[name = string("op_2359"), val = tensor([1, 4, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2359, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; bool var_2386_transpose_x_0 = const()[name = string("op_2386_transpose_x_0"), val = bool(false)]; bool var_2386_transpose_y_0 = const()[name = string("op_2386_transpose_y_0"), val = bool(true)]; tensor var_2386 = matmul(transpose_x = var_2386_transpose_x_0, transpose_y = var_2386_transpose_y_0, x = query_states_19, y = key_states_27_cast_fp16)[name = string("op_2386")]; fp16 var_2387_to_fp16 = const()[name = string("op_2387_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_9_cast_fp16 = mul(x = var_2386, y = var_2387_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("attn_weights_11_cast_fp16")]; int32 var_2422 = const()[name = string("op_2422"), val = int32(-1)]; tensor var_2424_cast_fp16 = softmax(axis = var_2422, x = attn_weights_11_cast_fp16)[name = string("op_2424_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 1024])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_2424_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1024, 128])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_55_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 128])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_2436_perm_0 = const()[name = string("op_2436_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2455 = const()[name = string("op_2455"), val = tensor([1, 64, 4096])]; tensor var_2436_cast_fp16 = transpose(perm = var_2436_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_139")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2455, x = var_2436_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2460 = const()[name = string("op_2460"), val = tensor([0, 2, 1])]; string var_2476_pad_type_0 = const()[name = string("op_2476_pad_type_0"), val = string("valid")]; int32 var_2476_groups_0 = const()[name = string("op_2476_groups_0"), val = int32(1)]; tensor var_2476_strides_0 = const()[name = string("op_2476_strides_0"), val = tensor([1])]; tensor var_2476_pad_0 = const()[name = string("op_2476_pad_0"), val = tensor([0, 0])]; tensor var_2476_dilations_0 = const()[name = string("op_2476_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865330368))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2461_cast_fp16 = transpose(perm = var_2460, x = attn_output_25_cast_fp16)[name = string("transpose_138")]; tensor var_2476_cast_fp16 = conv(dilations = var_2476_dilations_0, groups = var_2476_groups_0, pad = var_2476_pad_0, pad_type = var_2476_pad_type_0, strides = var_2476_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2461_cast_fp16)[name = string("op_2476_cast_fp16")]; tensor var_2480 = const()[name = string("op_2480"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_2480, x = var_2476_cast_fp16)[name = string("transpose_137")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_2499_axes_0 = const()[name = string("op_2499_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865340672)))]; fp16 var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2499_cast_fp16 = layer_norm(axes = var_2499_axes_0, epsilon = var_2487_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_2499_cast_fp16")]; tensor var_2513 = const()[name = string("op_2513"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_2514 = transpose(perm = var_2513, x = var_2499_cast_fp16)[name = string("transpose_136")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2514)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_2536_axes_0 = const()[name = string("op_2536_axes_0"), val = tensor([2])]; tensor var_2536 = squeeze(axes = var_2536_axes_0, x = e_5)[name = string("op_2536")]; tensor var_2537 = const()[name = string("op_2537"), val = tensor([0, 2, 1])]; tensor var_2538 = transpose(perm = var_2537, x = var_2536)[name = string("transpose_135")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_2538)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_2556_axes_0 = const()[name = string("op_2556_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865345856)))]; fp16 var_2544_to_fp16 = const()[name = string("op_2544_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2556_cast_fp16 = layer_norm(axes = var_2556_axes_0, epsilon = var_2544_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_2556_cast_fp16")]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([0, 2, 1])]; tensor var_2571_axes_0 = const()[name = string("op_2571_axes_0"), val = tensor([2])]; tensor var_2569 = transpose(perm = var_2568, x = var_2556_cast_fp16)[name = string("transpose_134")]; tensor var_2571 = expand_dims(axes = var_2571_axes_0, x = var_2569)[name = string("op_2571")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_2571)[name = string("query_states_25")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_2571)[name = string("key_states_31")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_2571)[name = string("value_states_25")]; tensor var_2613 = const()[name = string("op_2613"), val = tensor([1, 32, 128, 64])]; tensor var_2614 = reshape(shape = var_2613, x = query_states_25)[name = string("op_2614")]; tensor var_2619 = const()[name = string("op_2619"), val = tensor([0, 1, 3, 2])]; tensor var_2624 = const()[name = string("op_2624"), val = tensor([1, 8, 128, 64])]; tensor var_2625 = reshape(shape = var_2624, x = key_states_31)[name = string("op_2625")]; tensor var_2630 = const()[name = string("op_2630"), val = tensor([0, 1, 3, 2])]; tensor var_2635 = const()[name = string("op_2635"), val = tensor([1, 8, 128, 64])]; tensor var_2636 = reshape(shape = var_2635, x = value_states_25)[name = string("op_2636")]; tensor var_2641 = const()[name = string("op_2641"), val = tensor([0, 1, 3, 2])]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor x_61 = transpose(perm = var_2619, x = var_2614)[name = string("transpose_133")]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = x_61)[name = string("mean_27")]; tensor input_59 = sub(x = x_61, y = mean_27)[name = string("input_59")]; tensor var_2658_axes_0 = const()[name = string("op_2658_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351040)))]; fp16 var_2646_to_fp16 = const()[name = string("op_2646_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2658_cast_fp16 = layer_norm(axes = var_2658_axes_0, epsilon = var_2646_to_fp16, gamma = model_model_layers_3_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_2658_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor x_63 = transpose(perm = var_2630, x = var_2625)[name = string("transpose_132")]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = x_63)[name = string("mean_29")]; tensor input_61 = sub(x = x_63, y = mean_29)[name = string("input_61")]; tensor var_2676_axes_0 = const()[name = string("op_2676_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351360)))]; fp16 var_2664_to_fp16 = const()[name = string("op_2664_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2676_cast_fp16 = layer_norm(axes = var_2676_axes_0, epsilon = var_2664_to_fp16, gamma = model_model_layers_3_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_2676_cast_fp16")]; tensor var_2691 = mul(x = var_2658_cast_fp16, y = cos_5)[name = string("op_2691")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_2658_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_2658_cast_fp16)[name = string("x2_13")]; fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; tensor var_2712 = mul(x = x2_13, y = const_73_promoted)[name = string("op_2712")]; int32 var_2714 = const()[name = string("op_2714"), val = int32(-1)]; bool var_2715_interleave_0 = const()[name = string("op_2715_interleave_0"), val = bool(false)]; tensor var_2715 = concat(axis = var_2714, interleave = var_2715_interleave_0, values = (var_2712, x1_13))[name = string("op_2715")]; tensor var_2716 = mul(x = var_2715, y = sin_5)[name = string("op_2716")]; tensor query_states_27 = add(x = var_2691, y = var_2716)[name = string("query_states_27")]; tensor var_2719 = mul(x = var_2676_cast_fp16, y = cos_5)[name = string("op_2719")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_2676_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_2676_cast_fp16)[name = string("x2_15")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_2740 = mul(x = x2_15, y = const_76_promoted)[name = string("op_2740")]; int32 var_2742 = const()[name = string("op_2742"), val = int32(-1)]; bool var_2743_interleave_0 = const()[name = string("op_2743_interleave_0"), val = bool(false)]; tensor var_2743 = concat(axis = var_2742, interleave = var_2743_interleave_0, values = (var_2740, x1_15))[name = string("op_2743")]; tensor var_2744 = mul(x = var_2743, y = sin_5)[name = string("op_2744")]; tensor key_states_33 = add(x = var_2719, y = var_2744)[name = string("key_states_33")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_1230, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_33, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_42")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([39])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([40])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_1230, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_2641, x = var_2636)[name = string("transpose_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_43")]; tensor var_2815_begin_0 = const()[name = string("op_2815_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_2815_end_0 = const()[name = string("op_2815_end_0"), val = tensor([4, 8, 1024, 128])]; tensor var_2815_end_mask_0 = const()[name = string("op_2815_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2815_cast_fp16 = slice_by_index(begin = var_2815_begin_0, end = var_2815_end_0, end_mask = var_2815_end_mask_0, x = coreml_update_state_43)[name = string("op_2815_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_2815_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_2822_begin_0 = const()[name = string("op_2822_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_2822_end_0 = const()[name = string("op_2822_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_2822_end_mask_0 = const()[name = string("op_2822_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2822_cast_fp16 = slice_by_index(begin = var_2822_begin_0, end = var_2822_end_0, end_mask = var_2822_end_mask_0, x = coreml_update_state_43)[name = string("op_2822_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_2822_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_2851 = const()[name = string("op_2851"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_2851, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_2863 = const()[name = string("op_2863"), val = tensor([1, -1, 1024, 128])]; tensor key_states_37_cast_fp16 = reshape(shape = var_2863, x = x_69_cast_fp16)[name = string("key_states_37_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_2871 = const()[name = string("op_2871"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_2871, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_2898_transpose_x_0 = const()[name = string("op_2898_transpose_x_0"), val = bool(false)]; bool var_2898_transpose_y_0 = const()[name = string("op_2898_transpose_y_0"), val = bool(true)]; tensor var_2898 = matmul(transpose_x = var_2898_transpose_x_0, transpose_y = var_2898_transpose_y_0, x = query_states_27, y = key_states_37_cast_fp16)[name = string("op_2898")]; fp16 var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2898, y = var_2899_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2934 = const()[name = string("op_2934"), val = int32(-1)]; tensor var_2936_cast_fp16 = softmax(axis = var_2934, x = attn_weights_15_cast_fp16)[name = string("op_2936_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 1024])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_2936_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1024, 128])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_75_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 128])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_2948_perm_0 = const()[name = string("op_2948_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2967 = const()[name = string("op_2967"), val = tensor([1, 64, 4096])]; tensor var_2948_cast_fp16 = transpose(perm = var_2948_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_130")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_2967, x = var_2948_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_2972 = const()[name = string("op_2972"), val = tensor([0, 2, 1])]; string var_2988_pad_type_0 = const()[name = string("op_2988_pad_type_0"), val = string("valid")]; int32 var_2988_groups_0 = const()[name = string("op_2988_groups_0"), val = int32(1)]; tensor var_2988_strides_0 = const()[name = string("op_2988_strides_0"), val = tensor([1])]; tensor var_2988_pad_0 = const()[name = string("op_2988_pad_0"), val = tensor([0, 0])]; tensor var_2988_dilations_0 = const()[name = string("op_2988_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870594624))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2973_cast_fp16 = transpose(perm = var_2972, x = attn_output_35_cast_fp16)[name = string("transpose_129")]; tensor var_2988_cast_fp16 = conv(dilations = var_2988_dilations_0, groups = var_2988_groups_0, pad = var_2988_pad_0, pad_type = var_2988_pad_type_0, strides = var_2988_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2973_cast_fp16)[name = string("op_2988_cast_fp16")]; tensor var_2992 = const()[name = string("op_2992"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_2992, x = var_2988_cast_fp16)[name = string("transpose_128")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_3011_axes_0 = const()[name = string("op_3011_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870604928)))]; fp16 var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3011_cast_fp16 = layer_norm(axes = var_3011_axes_0, epsilon = var_2999_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_3011_cast_fp16")]; tensor var_3025 = const()[name = string("op_3025"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_3026 = transpose(perm = var_3025, x = var_3011_cast_fp16)[name = string("transpose_127")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3026)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_3048_axes_0 = const()[name = string("op_3048_axes_0"), val = tensor([2])]; tensor var_3048 = squeeze(axes = var_3048_axes_0, x = e_7)[name = string("op_3048")]; tensor var_3049 = const()[name = string("op_3049"), val = tensor([0, 2, 1])]; tensor var_3050 = transpose(perm = var_3049, x = var_3048)[name = string("transpose_126")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_3050)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_3068_axes_0 = const()[name = string("op_3068_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870610112)))]; fp16 var_3056_to_fp16 = const()[name = string("op_3056_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3068_cast_fp16 = layer_norm(axes = var_3068_axes_0, epsilon = var_3056_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_3068_cast_fp16")]; tensor var_3080 = const()[name = string("op_3080"), val = tensor([0, 2, 1])]; tensor var_3083_axes_0 = const()[name = string("op_3083_axes_0"), val = tensor([2])]; tensor var_3081 = transpose(perm = var_3080, x = var_3068_cast_fp16)[name = string("transpose_125")]; tensor var_3083 = expand_dims(axes = var_3083_axes_0, x = var_3081)[name = string("op_3083")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_3083)[name = string("query_states_33")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_3083)[name = string("key_states_41")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_3083)[name = string("value_states_33")]; tensor var_3125 = const()[name = string("op_3125"), val = tensor([1, 32, 128, 64])]; tensor var_3126 = reshape(shape = var_3125, x = query_states_33)[name = string("op_3126")]; tensor var_3131 = const()[name = string("op_3131"), val = tensor([0, 1, 3, 2])]; tensor var_3136 = const()[name = string("op_3136"), val = tensor([1, 8, 128, 64])]; tensor var_3137 = reshape(shape = var_3136, x = key_states_41)[name = string("op_3137")]; tensor var_3142 = const()[name = string("op_3142"), val = tensor([0, 1, 3, 2])]; tensor var_3147 = const()[name = string("op_3147"), val = tensor([1, 8, 128, 64])]; tensor var_3148 = reshape(shape = var_3147, x = value_states_33)[name = string("op_3148")]; tensor var_3153 = const()[name = string("op_3153"), val = tensor([0, 1, 3, 2])]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor x_81 = transpose(perm = var_3131, x = var_3126)[name = string("transpose_124")]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = x_81)[name = string("mean_35")]; tensor input_77 = sub(x = x_81, y = mean_35)[name = string("input_77")]; tensor var_3170_axes_0 = const()[name = string("op_3170_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615296)))]; fp16 var_3158_to_fp16 = const()[name = string("op_3158_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3170_cast_fp16 = layer_norm(axes = var_3170_axes_0, epsilon = var_3158_to_fp16, gamma = model_model_layers_4_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_3170_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor x_83 = transpose(perm = var_3142, x = var_3137)[name = string("transpose_123")]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = x_83)[name = string("mean_37")]; tensor input_79 = sub(x = x_83, y = mean_37)[name = string("input_79")]; tensor var_3188_axes_0 = const()[name = string("op_3188_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615616)))]; fp16 var_3176_to_fp16 = const()[name = string("op_3176_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3188_cast_fp16 = layer_norm(axes = var_3188_axes_0, epsilon = var_3176_to_fp16, gamma = model_model_layers_4_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3188_cast_fp16")]; tensor var_3203 = mul(x = var_3170_cast_fp16, y = cos_5)[name = string("op_3203")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_3170_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_3170_cast_fp16)[name = string("x2_17")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_3224 = mul(x = x2_17, y = const_95_promoted)[name = string("op_3224")]; int32 var_3226 = const()[name = string("op_3226"), val = int32(-1)]; bool var_3227_interleave_0 = const()[name = string("op_3227_interleave_0"), val = bool(false)]; tensor var_3227 = concat(axis = var_3226, interleave = var_3227_interleave_0, values = (var_3224, x1_17))[name = string("op_3227")]; tensor var_3228 = mul(x = var_3227, y = sin_5)[name = string("op_3228")]; tensor query_states_35 = add(x = var_3203, y = var_3228)[name = string("query_states_35")]; tensor var_3231 = mul(x = var_3188_cast_fp16, y = cos_5)[name = string("op_3231")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3188_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3188_cast_fp16)[name = string("x2_19")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_3252 = mul(x = x2_19, y = const_98_promoted)[name = string("op_3252")]; int32 var_3254 = const()[name = string("op_3254"), val = int32(-1)]; bool var_3255_interleave_0 = const()[name = string("op_3255_interleave_0"), val = bool(false)]; tensor var_3255 = concat(axis = var_3254, interleave = var_3255_interleave_0, values = (var_3252, x1_19))[name = string("op_3255")]; tensor var_3256 = mul(x = var_3255, y = sin_5)[name = string("op_3256")]; tensor key_states_43 = add(x = var_3231, y = var_3256)[name = string("key_states_43")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_1230, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_43, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_44")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([40])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([41])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_1230, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35 = transpose(perm = var_3153, x = var_3148)[name = string("transpose_122")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_45")]; tensor var_3327_begin_0 = const()[name = string("op_3327_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3327_end_0 = const()[name = string("op_3327_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_3327_end_mask_0 = const()[name = string("op_3327_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3327_cast_fp16 = slice_by_index(begin = var_3327_begin_0, end = var_3327_end_0, end_mask = var_3327_end_mask_0, x = coreml_update_state_45)[name = string("op_3327_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3327_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3334_begin_0 = const()[name = string("op_3334_begin_0"), val = tensor([40, 0, 0, 0])]; tensor var_3334_end_0 = const()[name = string("op_3334_end_0"), val = tensor([41, 8, 1024, 128])]; tensor var_3334_end_mask_0 = const()[name = string("op_3334_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3334_cast_fp16 = slice_by_index(begin = var_3334_begin_0, end = var_3334_end_0, end_mask = var_3334_end_mask_0, x = coreml_update_state_45)[name = string("op_3334_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3334_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3363 = const()[name = string("op_3363"), val = tensor([1, 4, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3363, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3375 = const()[name = string("op_3375"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_3375, x = x_89_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3383 = const()[name = string("op_3383"), val = tensor([1, 4, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3383, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; bool var_3410_transpose_x_0 = const()[name = string("op_3410_transpose_x_0"), val = bool(false)]; bool var_3410_transpose_y_0 = const()[name = string("op_3410_transpose_y_0"), val = bool(true)]; tensor var_3410 = matmul(transpose_x = var_3410_transpose_x_0, transpose_y = var_3410_transpose_y_0, x = query_states_35, y = key_states_47_cast_fp16)[name = string("op_3410")]; fp16 var_3411_to_fp16 = const()[name = string("op_3411_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_17_cast_fp16 = mul(x = var_3410, y = var_3411_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("attn_weights_19_cast_fp16")]; int32 var_3446 = const()[name = string("op_3446"), val = int32(-1)]; tensor var_3448_cast_fp16 = softmax(axis = var_3446, x = attn_weights_19_cast_fp16)[name = string("op_3448_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 1024])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_3448_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 1024, 128])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_95_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 128])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_3460_perm_0 = const()[name = string("op_3460_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3479 = const()[name = string("op_3479"), val = tensor([1, 64, 4096])]; tensor var_3460_cast_fp16 = transpose(perm = var_3460_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_121")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_3479, x = var_3460_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_3484 = const()[name = string("op_3484"), val = tensor([0, 2, 1])]; string var_3500_pad_type_0 = const()[name = string("op_3500_pad_type_0"), val = string("valid")]; int32 var_3500_groups_0 = const()[name = string("op_3500_groups_0"), val = int32(1)]; tensor var_3500_strides_0 = const()[name = string("op_3500_strides_0"), val = tensor([1])]; tensor var_3500_pad_0 = const()[name = string("op_3500_pad_0"), val = tensor([0, 0])]; tensor var_3500_dilations_0 = const()[name = string("op_3500_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875858880))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3485_cast_fp16 = transpose(perm = var_3484, x = attn_output_45_cast_fp16)[name = string("transpose_120")]; tensor var_3500_cast_fp16 = conv(dilations = var_3500_dilations_0, groups = var_3500_groups_0, pad = var_3500_pad_0, pad_type = var_3500_pad_type_0, strides = var_3500_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3485_cast_fp16)[name = string("op_3500_cast_fp16")]; tensor var_3504 = const()[name = string("op_3504"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_3504, x = var_3500_cast_fp16)[name = string("transpose_119")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_3523_axes_0 = const()[name = string("op_3523_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875869184)))]; fp16 var_3511_to_fp16 = const()[name = string("op_3511_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3523_cast_fp16 = layer_norm(axes = var_3523_axes_0, epsilon = var_3511_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_3523_cast_fp16")]; tensor var_3537 = const()[name = string("op_3537"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_3538 = transpose(perm = var_3537, x = var_3523_cast_fp16)[name = string("transpose_118")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3538)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_3560_axes_0 = const()[name = string("op_3560_axes_0"), val = tensor([2])]; tensor var_3560 = squeeze(axes = var_3560_axes_0, x = e_9)[name = string("op_3560")]; tensor var_3561 = const()[name = string("op_3561"), val = tensor([0, 2, 1])]; tensor var_3562 = transpose(perm = var_3561, x = var_3560)[name = string("transpose_117")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3562)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_3580_axes_0 = const()[name = string("op_3580_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875874368)))]; fp16 var_3568_to_fp16 = const()[name = string("op_3568_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3580_cast_fp16 = layer_norm(axes = var_3580_axes_0, epsilon = var_3568_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_3580_cast_fp16")]; tensor var_3592 = const()[name = string("op_3592"), val = tensor([0, 2, 1])]; tensor var_3595_axes_0 = const()[name = string("op_3595_axes_0"), val = tensor([2])]; tensor var_3593 = transpose(perm = var_3592, x = var_3580_cast_fp16)[name = string("transpose_116")]; tensor var_3595 = expand_dims(axes = var_3595_axes_0, x = var_3593)[name = string("op_3595")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_3595)[name = string("query_states_41")]; string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_3595)[name = string("key_states_51")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_3595)[name = string("value_states_41")]; tensor var_3637 = const()[name = string("op_3637"), val = tensor([1, 32, 128, 64])]; tensor var_3638 = reshape(shape = var_3637, x = query_states_41)[name = string("op_3638")]; tensor var_3643 = const()[name = string("op_3643"), val = tensor([0, 1, 3, 2])]; tensor var_3648 = const()[name = string("op_3648"), val = tensor([1, 8, 128, 64])]; tensor var_3649 = reshape(shape = var_3648, x = key_states_51)[name = string("op_3649")]; tensor var_3654 = const()[name = string("op_3654"), val = tensor([0, 1, 3, 2])]; tensor var_3659 = const()[name = string("op_3659"), val = tensor([1, 8, 128, 64])]; tensor var_3660 = reshape(shape = var_3659, x = value_states_41)[name = string("op_3660")]; tensor var_3665 = const()[name = string("op_3665"), val = tensor([0, 1, 3, 2])]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor x_101 = transpose(perm = var_3643, x = var_3638)[name = string("transpose_115")]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = x_101)[name = string("mean_43")]; tensor input_95 = sub(x = x_101, y = mean_43)[name = string("input_95")]; tensor var_3682_axes_0 = const()[name = string("op_3682_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879552)))]; fp16 var_3670_to_fp16 = const()[name = string("op_3670_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3682_cast_fp16 = layer_norm(axes = var_3682_axes_0, epsilon = var_3670_to_fp16, gamma = model_model_layers_5_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_3682_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor x_103 = transpose(perm = var_3654, x = var_3649)[name = string("transpose_114")]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = x_103)[name = string("mean_45")]; tensor input_97 = sub(x = x_103, y = mean_45)[name = string("input_97")]; tensor var_3700_axes_0 = const()[name = string("op_3700_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879872)))]; fp16 var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3700_cast_fp16 = layer_norm(axes = var_3700_axes_0, epsilon = var_3688_to_fp16, gamma = model_model_layers_5_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_3700_cast_fp16")]; tensor var_3715 = mul(x = var_3682_cast_fp16, y = cos_5)[name = string("op_3715")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_3682_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_3682_cast_fp16)[name = string("x2_21")]; fp16 const_117_promoted = const()[name = string("const_117_promoted"), val = fp16(-0x1p+0)]; tensor var_3736 = mul(x = x2_21, y = const_117_promoted)[name = string("op_3736")]; int32 var_3738 = const()[name = string("op_3738"), val = int32(-1)]; bool var_3739_interleave_0 = const()[name = string("op_3739_interleave_0"), val = bool(false)]; tensor var_3739 = concat(axis = var_3738, interleave = var_3739_interleave_0, values = (var_3736, x1_21))[name = string("op_3739")]; tensor var_3740 = mul(x = var_3739, y = sin_5)[name = string("op_3740")]; tensor query_states_43 = add(x = var_3715, y = var_3740)[name = string("query_states_43")]; tensor var_3743 = mul(x = var_3700_cast_fp16, y = cos_5)[name = string("op_3743")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_3700_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_3700_cast_fp16)[name = string("x2_23")]; fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; tensor var_3764 = mul(x = x2_23, y = const_120_promoted)[name = string("op_3764")]; int32 var_3766 = const()[name = string("op_3766"), val = int32(-1)]; bool var_3767_interleave_0 = const()[name = string("op_3767_interleave_0"), val = bool(false)]; tensor var_3767 = concat(axis = var_3766, interleave = var_3767_interleave_0, values = (var_3764, x1_23))[name = string("op_3767")]; tensor var_3768 = mul(x = var_3767, y = sin_5)[name = string("op_3768")]; tensor key_states_53 = add(x = var_3743, y = var_3768)[name = string("key_states_53")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_1230, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_53, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_46")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([41])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([42])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_1230, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43 = transpose(perm = var_3665, x = var_3660)[name = string("transpose_113")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_43, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_47")]; tensor var_3839_begin_0 = const()[name = string("op_3839_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_3839_end_0 = const()[name = string("op_3839_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_3839_end_mask_0 = const()[name = string("op_3839_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3839_cast_fp16 = slice_by_index(begin = var_3839_begin_0, end = var_3839_end_0, end_mask = var_3839_end_mask_0, x = coreml_update_state_47)[name = string("op_3839_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_3839_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_3846_begin_0 = const()[name = string("op_3846_begin_0"), val = tensor([41, 0, 0, 0])]; tensor var_3846_end_0 = const()[name = string("op_3846_end_0"), val = tensor([42, 8, 1024, 128])]; tensor var_3846_end_mask_0 = const()[name = string("op_3846_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3846_cast_fp16 = slice_by_index(begin = var_3846_begin_0, end = var_3846_end_0, end_mask = var_3846_end_mask_0, x = coreml_update_state_47)[name = string("op_3846_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_3846_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_3875 = const()[name = string("op_3875"), val = tensor([1, 4, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_3875, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_3887 = const()[name = string("op_3887"), val = tensor([1, -1, 1024, 128])]; tensor key_states_57_cast_fp16 = reshape(shape = var_3887, x = x_109_cast_fp16)[name = string("key_states_57_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_3895 = const()[name = string("op_3895"), val = tensor([1, 4, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_3895, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; bool var_3922_transpose_x_0 = const()[name = string("op_3922_transpose_x_0"), val = bool(false)]; bool var_3922_transpose_y_0 = const()[name = string("op_3922_transpose_y_0"), val = bool(true)]; tensor var_3922 = matmul(transpose_x = var_3922_transpose_x_0, transpose_y = var_3922_transpose_y_0, x = query_states_43, y = key_states_57_cast_fp16)[name = string("op_3922")]; fp16 var_3923_to_fp16 = const()[name = string("op_3923_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_21_cast_fp16 = mul(x = var_3922, y = var_3923_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; int32 var_3958 = const()[name = string("op_3958"), val = int32(-1)]; tensor var_3960_cast_fp16 = softmax(axis = var_3958, x = attn_weights_23_cast_fp16)[name = string("op_3960_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 1024])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_3960_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 1024, 128])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_115_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 128])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_3972_perm_0 = const()[name = string("op_3972_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3991 = const()[name = string("op_3991"), val = tensor([1, 64, 4096])]; tensor var_3972_cast_fp16 = transpose(perm = var_3972_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_112")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_3991, x = var_3972_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_3996 = const()[name = string("op_3996"), val = tensor([0, 2, 1])]; string var_4012_pad_type_0 = const()[name = string("op_4012_pad_type_0"), val = string("valid")]; int32 var_4012_groups_0 = const()[name = string("op_4012_groups_0"), val = int32(1)]; tensor var_4012_strides_0 = const()[name = string("op_4012_strides_0"), val = tensor([1])]; tensor var_4012_pad_0 = const()[name = string("op_4012_pad_0"), val = tensor([0, 0])]; tensor var_4012_dilations_0 = const()[name = string("op_4012_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881123136))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3997_cast_fp16 = transpose(perm = var_3996, x = attn_output_55_cast_fp16)[name = string("transpose_111")]; tensor var_4012_cast_fp16 = conv(dilations = var_4012_dilations_0, groups = var_4012_groups_0, pad = var_4012_pad_0, pad_type = var_4012_pad_type_0, strides = var_4012_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_3997_cast_fp16)[name = string("op_4012_cast_fp16")]; tensor var_4016 = const()[name = string("op_4016"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_4016, x = var_4012_cast_fp16)[name = string("transpose_110")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_4035_axes_0 = const()[name = string("op_4035_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881133440)))]; fp16 var_4023_to_fp16 = const()[name = string("op_4023_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4035_cast_fp16 = layer_norm(axes = var_4035_axes_0, epsilon = var_4023_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_4035_cast_fp16")]; tensor var_4049 = const()[name = string("op_4049"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_4050 = transpose(perm = var_4049, x = var_4035_cast_fp16)[name = string("transpose_109")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4050)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_4072_axes_0 = const()[name = string("op_4072_axes_0"), val = tensor([2])]; tensor var_4072 = squeeze(axes = var_4072_axes_0, x = e_11)[name = string("op_4072")]; tensor var_4073 = const()[name = string("op_4073"), val = tensor([0, 2, 1])]; tensor var_4074 = transpose(perm = var_4073, x = var_4072)[name = string("transpose_108")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_4074)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_4092_axes_0 = const()[name = string("op_4092_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881138624)))]; fp16 var_4080_to_fp16 = const()[name = string("op_4080_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4092_cast_fp16 = layer_norm(axes = var_4092_axes_0, epsilon = var_4080_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_4092_cast_fp16")]; tensor var_4104 = const()[name = string("op_4104"), val = tensor([0, 2, 1])]; tensor var_4107_axes_0 = const()[name = string("op_4107_axes_0"), val = tensor([2])]; tensor var_4105 = transpose(perm = var_4104, x = var_4092_cast_fp16)[name = string("transpose_107")]; tensor var_4107 = expand_dims(axes = var_4107_axes_0, x = var_4105)[name = string("op_4107")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_4107)[name = string("query_states_49")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_4107)[name = string("key_states_61")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_4107)[name = string("value_states_49")]; tensor var_4149 = const()[name = string("op_4149"), val = tensor([1, 32, 128, 64])]; tensor var_4150 = reshape(shape = var_4149, x = query_states_49)[name = string("op_4150")]; tensor var_4155 = const()[name = string("op_4155"), val = tensor([0, 1, 3, 2])]; tensor var_4160 = const()[name = string("op_4160"), val = tensor([1, 8, 128, 64])]; tensor var_4161 = reshape(shape = var_4160, x = key_states_61)[name = string("op_4161")]; tensor var_4166 = const()[name = string("op_4166"), val = tensor([0, 1, 3, 2])]; tensor var_4171 = const()[name = string("op_4171"), val = tensor([1, 8, 128, 64])]; tensor var_4172 = reshape(shape = var_4171, x = value_states_49)[name = string("op_4172")]; tensor var_4177 = const()[name = string("op_4177"), val = tensor([0, 1, 3, 2])]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor x_121 = transpose(perm = var_4155, x = var_4150)[name = string("transpose_106")]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = x_121)[name = string("mean_51")]; tensor input_113 = sub(x = x_121, y = mean_51)[name = string("input_113")]; tensor var_4194_axes_0 = const()[name = string("op_4194_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881143808)))]; fp16 var_4182_to_fp16 = const()[name = string("op_4182_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4194_cast_fp16 = layer_norm(axes = var_4194_axes_0, epsilon = var_4182_to_fp16, gamma = model_model_layers_6_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_4194_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor x_123 = transpose(perm = var_4166, x = var_4161)[name = string("transpose_105")]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = x_123)[name = string("mean_53")]; tensor input_115 = sub(x = x_123, y = mean_53)[name = string("input_115")]; tensor var_4212_axes_0 = const()[name = string("op_4212_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144128)))]; fp16 var_4200_to_fp16 = const()[name = string("op_4200_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4212_cast_fp16 = layer_norm(axes = var_4212_axes_0, epsilon = var_4200_to_fp16, gamma = model_model_layers_6_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_4212_cast_fp16")]; tensor var_4227 = mul(x = var_4194_cast_fp16, y = cos_5)[name = string("op_4227")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_4194_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_4194_cast_fp16)[name = string("x2_25")]; fp16 const_139_promoted = const()[name = string("const_139_promoted"), val = fp16(-0x1p+0)]; tensor var_4248 = mul(x = x2_25, y = const_139_promoted)[name = string("op_4248")]; int32 var_4250 = const()[name = string("op_4250"), val = int32(-1)]; bool var_4251_interleave_0 = const()[name = string("op_4251_interleave_0"), val = bool(false)]; tensor var_4251 = concat(axis = var_4250, interleave = var_4251_interleave_0, values = (var_4248, x1_25))[name = string("op_4251")]; tensor var_4252 = mul(x = var_4251, y = sin_5)[name = string("op_4252")]; tensor query_states_51 = add(x = var_4227, y = var_4252)[name = string("query_states_51")]; tensor var_4255 = mul(x = var_4212_cast_fp16, y = cos_5)[name = string("op_4255")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_4212_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_4212_cast_fp16)[name = string("x2_27")]; fp16 const_142_promoted = const()[name = string("const_142_promoted"), val = fp16(-0x1p+0)]; tensor var_4276 = mul(x = x2_27, y = const_142_promoted)[name = string("op_4276")]; int32 var_4278 = const()[name = string("op_4278"), val = int32(-1)]; bool var_4279_interleave_0 = const()[name = string("op_4279_interleave_0"), val = bool(false)]; tensor var_4279 = concat(axis = var_4278, interleave = var_4279_interleave_0, values = (var_4276, x1_27))[name = string("op_4279")]; tensor var_4280 = mul(x = var_4279, y = sin_5)[name = string("op_4280")]; tensor key_states_63 = add(x = var_4255, y = var_4280)[name = string("key_states_63")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_1230, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_63, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_48")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([42])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([43])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_1230, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51 = transpose(perm = var_4177, x = var_4172)[name = string("transpose_104")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_51, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_49")]; tensor var_4351_begin_0 = const()[name = string("op_4351_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4351_end_0 = const()[name = string("op_4351_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_4351_end_mask_0 = const()[name = string("op_4351_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4351_cast_fp16 = slice_by_index(begin = var_4351_begin_0, end = var_4351_end_0, end_mask = var_4351_end_mask_0, x = coreml_update_state_49)[name = string("op_4351_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4351_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4358_begin_0 = const()[name = string("op_4358_begin_0"), val = tensor([42, 0, 0, 0])]; tensor var_4358_end_0 = const()[name = string("op_4358_end_0"), val = tensor([43, 8, 1024, 128])]; tensor var_4358_end_mask_0 = const()[name = string("op_4358_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4358_cast_fp16 = slice_by_index(begin = var_4358_begin_0, end = var_4358_end_0, end_mask = var_4358_end_mask_0, x = coreml_update_state_49)[name = string("op_4358_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4358_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4387 = const()[name = string("op_4387"), val = tensor([1, 4, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4387, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4399 = const()[name = string("op_4399"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_4399, x = x_129_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4407 = const()[name = string("op_4407"), val = tensor([1, 4, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4407, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; bool var_4434_transpose_x_0 = const()[name = string("op_4434_transpose_x_0"), val = bool(false)]; bool var_4434_transpose_y_0 = const()[name = string("op_4434_transpose_y_0"), val = bool(true)]; tensor var_4434 = matmul(transpose_x = var_4434_transpose_x_0, transpose_y = var_4434_transpose_y_0, x = query_states_51, y = key_states_67_cast_fp16)[name = string("op_4434")]; fp16 var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_4434, y = var_4435_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_4470 = const()[name = string("op_4470"), val = int32(-1)]; tensor var_4472_cast_fp16 = softmax(axis = var_4470, x = attn_weights_27_cast_fp16)[name = string("op_4472_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 1024])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_4472_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 1024, 128])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_135_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 128])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_4484_perm_0 = const()[name = string("op_4484_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4503 = const()[name = string("op_4503"), val = tensor([1, 64, 4096])]; tensor var_4484_cast_fp16 = transpose(perm = var_4484_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_103")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_4503, x = var_4484_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_4508 = const()[name = string("op_4508"), val = tensor([0, 2, 1])]; string var_4524_pad_type_0 = const()[name = string("op_4524_pad_type_0"), val = string("valid")]; int32 var_4524_groups_0 = const()[name = string("op_4524_groups_0"), val = int32(1)]; tensor var_4524_strides_0 = const()[name = string("op_4524_strides_0"), val = tensor([1])]; tensor var_4524_pad_0 = const()[name = string("op_4524_pad_0"), val = tensor([0, 0])]; tensor var_4524_dilations_0 = const()[name = string("op_4524_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886387392))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4509_cast_fp16 = transpose(perm = var_4508, x = attn_output_65_cast_fp16)[name = string("transpose_102")]; tensor var_4524_cast_fp16 = conv(dilations = var_4524_dilations_0, groups = var_4524_groups_0, pad = var_4524_pad_0, pad_type = var_4524_pad_type_0, strides = var_4524_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4509_cast_fp16)[name = string("op_4524_cast_fp16")]; tensor var_4528 = const()[name = string("op_4528"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_4528, x = var_4524_cast_fp16)[name = string("transpose_101")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_4547_axes_0 = const()[name = string("op_4547_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886397696)))]; fp16 var_4535_to_fp16 = const()[name = string("op_4535_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4547_cast_fp16 = layer_norm(axes = var_4547_axes_0, epsilon = var_4535_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_4547_cast_fp16")]; tensor var_4561 = const()[name = string("op_4561"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4562 = transpose(perm = var_4561, x = var_4547_cast_fp16)[name = string("transpose_100")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4562)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_4584_axes_0 = const()[name = string("op_4584_axes_0"), val = tensor([2])]; tensor var_4584 = squeeze(axes = var_4584_axes_0, x = e_13)[name = string("op_4584")]; tensor var_4585 = const()[name = string("op_4585"), val = tensor([0, 2, 1])]; tensor var_4586 = transpose(perm = var_4585, x = var_4584)[name = string("transpose_99")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_4586)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_4604_axes_0 = const()[name = string("op_4604_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886402880)))]; fp16 var_4592_to_fp16 = const()[name = string("op_4592_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4604_cast_fp16 = layer_norm(axes = var_4604_axes_0, epsilon = var_4592_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_4604_cast_fp16")]; tensor var_4616 = const()[name = string("op_4616"), val = tensor([0, 2, 1])]; tensor var_4619_axes_0 = const()[name = string("op_4619_axes_0"), val = tensor([2])]; tensor var_4617 = transpose(perm = var_4616, x = var_4604_cast_fp16)[name = string("transpose_98")]; tensor var_4619 = expand_dims(axes = var_4619_axes_0, x = var_4617)[name = string("op_4619")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_4619)[name = string("query_states_57")]; string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_4619)[name = string("key_states_71")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_4619)[name = string("value_states_57")]; tensor var_4661 = const()[name = string("op_4661"), val = tensor([1, 32, 128, 64])]; tensor var_4662 = reshape(shape = var_4661, x = query_states_57)[name = string("op_4662")]; tensor var_4667 = const()[name = string("op_4667"), val = tensor([0, 1, 3, 2])]; tensor var_4672 = const()[name = string("op_4672"), val = tensor([1, 8, 128, 64])]; tensor var_4673 = reshape(shape = var_4672, x = key_states_71)[name = string("op_4673")]; tensor var_4678 = const()[name = string("op_4678"), val = tensor([0, 1, 3, 2])]; tensor var_4683 = const()[name = string("op_4683"), val = tensor([1, 8, 128, 64])]; tensor var_4684 = reshape(shape = var_4683, x = value_states_57)[name = string("op_4684")]; tensor var_4689 = const()[name = string("op_4689"), val = tensor([0, 1, 3, 2])]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor x_141 = transpose(perm = var_4667, x = var_4662)[name = string("transpose_97")]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = x_141)[name = string("mean_59")]; tensor input_131 = sub(x = x_141, y = mean_59)[name = string("input_131")]; tensor var_4706_axes_0 = const()[name = string("op_4706_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408064)))]; fp16 var_4694_to_fp16 = const()[name = string("op_4694_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4706_cast_fp16 = layer_norm(axes = var_4706_axes_0, epsilon = var_4694_to_fp16, gamma = model_model_layers_7_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_4706_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor x_143 = transpose(perm = var_4678, x = var_4673)[name = string("transpose_96")]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = x_143)[name = string("mean_61")]; tensor input_133 = sub(x = x_143, y = mean_61)[name = string("input_133")]; tensor var_4724_axes_0 = const()[name = string("op_4724_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408384)))]; fp16 var_4712_to_fp16 = const()[name = string("op_4712_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4724_cast_fp16 = layer_norm(axes = var_4724_axes_0, epsilon = var_4712_to_fp16, gamma = model_model_layers_7_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_4724_cast_fp16")]; tensor var_4739 = mul(x = var_4706_cast_fp16, y = cos_5)[name = string("op_4739")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_4706_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_4706_cast_fp16)[name = string("x2_29")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_4760 = mul(x = x2_29, y = const_161_promoted)[name = string("op_4760")]; int32 var_4762 = const()[name = string("op_4762"), val = int32(-1)]; bool var_4763_interleave_0 = const()[name = string("op_4763_interleave_0"), val = bool(false)]; tensor var_4763 = concat(axis = var_4762, interleave = var_4763_interleave_0, values = (var_4760, x1_29))[name = string("op_4763")]; tensor var_4764 = mul(x = var_4763, y = sin_5)[name = string("op_4764")]; tensor query_states_59 = add(x = var_4739, y = var_4764)[name = string("query_states_59")]; tensor var_4767 = mul(x = var_4724_cast_fp16, y = cos_5)[name = string("op_4767")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_4724_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_4724_cast_fp16)[name = string("x2_31")]; fp16 const_164_promoted = const()[name = string("const_164_promoted"), val = fp16(-0x1p+0)]; tensor var_4788 = mul(x = x2_31, y = const_164_promoted)[name = string("op_4788")]; int32 var_4790 = const()[name = string("op_4790"), val = int32(-1)]; bool var_4791_interleave_0 = const()[name = string("op_4791_interleave_0"), val = bool(false)]; tensor var_4791 = concat(axis = var_4790, interleave = var_4791_interleave_0, values = (var_4788, x1_31))[name = string("op_4791")]; tensor var_4792 = mul(x = var_4791, y = sin_5)[name = string("op_4792")]; tensor key_states_73 = add(x = var_4767, y = var_4792)[name = string("key_states_73")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_1230, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_73, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_50")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([43])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([44])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_1230, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59 = transpose(perm = var_4689, x = var_4684)[name = string("transpose_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_59, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_51")]; tensor var_4863_begin_0 = const()[name = string("op_4863_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_4863_end_0 = const()[name = string("op_4863_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_4863_end_mask_0 = const()[name = string("op_4863_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4863_cast_fp16 = slice_by_index(begin = var_4863_begin_0, end = var_4863_end_0, end_mask = var_4863_end_mask_0, x = coreml_update_state_51)[name = string("op_4863_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_4863_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_4870_begin_0 = const()[name = string("op_4870_begin_0"), val = tensor([43, 0, 0, 0])]; tensor var_4870_end_0 = const()[name = string("op_4870_end_0"), val = tensor([44, 8, 1024, 128])]; tensor var_4870_end_mask_0 = const()[name = string("op_4870_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4870_cast_fp16 = slice_by_index(begin = var_4870_begin_0, end = var_4870_end_0, end_mask = var_4870_end_mask_0, x = coreml_update_state_51)[name = string("op_4870_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_4870_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_4899 = const()[name = string("op_4899"), val = tensor([1, 4, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_4899, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_4911 = const()[name = string("op_4911"), val = tensor([1, -1, 1024, 128])]; tensor key_states_77_cast_fp16 = reshape(shape = var_4911, x = x_149_cast_fp16)[name = string("key_states_77_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_4919 = const()[name = string("op_4919"), val = tensor([1, 4, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_4919, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; bool var_4946_transpose_x_0 = const()[name = string("op_4946_transpose_x_0"), val = bool(false)]; bool var_4946_transpose_y_0 = const()[name = string("op_4946_transpose_y_0"), val = bool(true)]; tensor var_4946 = matmul(transpose_x = var_4946_transpose_x_0, transpose_y = var_4946_transpose_y_0, x = query_states_59, y = key_states_77_cast_fp16)[name = string("op_4946")]; fp16 var_4947_to_fp16 = const()[name = string("op_4947_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_29_cast_fp16 = mul(x = var_4946, y = var_4947_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("attn_weights_31_cast_fp16")]; int32 var_4982 = const()[name = string("op_4982"), val = int32(-1)]; tensor var_4984_cast_fp16 = softmax(axis = var_4982, x = attn_weights_31_cast_fp16)[name = string("op_4984_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 1024])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_4984_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 1024, 128])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_155_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 128])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_4996_perm_0 = const()[name = string("op_4996_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5015 = const()[name = string("op_5015"), val = tensor([1, 64, 4096])]; tensor var_4996_cast_fp16 = transpose(perm = var_4996_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_94")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_5015, x = var_4996_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_5020 = const()[name = string("op_5020"), val = tensor([0, 2, 1])]; string var_5036_pad_type_0 = const()[name = string("op_5036_pad_type_0"), val = string("valid")]; int32 var_5036_groups_0 = const()[name = string("op_5036_groups_0"), val = int32(1)]; tensor var_5036_strides_0 = const()[name = string("op_5036_strides_0"), val = tensor([1])]; tensor var_5036_pad_0 = const()[name = string("op_5036_pad_0"), val = tensor([0, 0])]; tensor var_5036_dilations_0 = const()[name = string("op_5036_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891651648))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5021_cast_fp16 = transpose(perm = var_5020, x = attn_output_75_cast_fp16)[name = string("transpose_93")]; tensor var_5036_cast_fp16 = conv(dilations = var_5036_dilations_0, groups = var_5036_groups_0, pad = var_5036_pad_0, pad_type = var_5036_pad_type_0, strides = var_5036_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5021_cast_fp16)[name = string("op_5036_cast_fp16")]; tensor var_5040 = const()[name = string("op_5040"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_5040, x = var_5036_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_5059_axes_0 = const()[name = string("op_5059_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891661952)))]; fp16 var_5047_to_fp16 = const()[name = string("op_5047_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5059_cast_fp16 = layer_norm(axes = var_5059_axes_0, epsilon = var_5047_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_5059_cast_fp16")]; tensor var_5073 = const()[name = string("op_5073"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_5074 = transpose(perm = var_5073, x = var_5059_cast_fp16)[name = string("transpose_91")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5074)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_5096_axes_0 = const()[name = string("op_5096_axes_0"), val = tensor([2])]; tensor var_5096 = squeeze(axes = var_5096_axes_0, x = e_15)[name = string("op_5096")]; tensor var_5097 = const()[name = string("op_5097"), val = tensor([0, 2, 1])]; tensor var_5098 = transpose(perm = var_5097, x = var_5096)[name = string("transpose_90")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_5098)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_5116_axes_0 = const()[name = string("op_5116_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891667136)))]; fp16 var_5104_to_fp16 = const()[name = string("op_5104_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5116_cast_fp16 = layer_norm(axes = var_5116_axes_0, epsilon = var_5104_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_5116_cast_fp16")]; tensor var_5128 = const()[name = string("op_5128"), val = tensor([0, 2, 1])]; tensor var_5131_axes_0 = const()[name = string("op_5131_axes_0"), val = tensor([2])]; tensor var_5129 = transpose(perm = var_5128, x = var_5116_cast_fp16)[name = string("transpose_89")]; tensor var_5131 = expand_dims(axes = var_5131_axes_0, x = var_5129)[name = string("op_5131")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_5131)[name = string("query_states_65")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_5131)[name = string("key_states_81")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_5131)[name = string("value_states_65")]; tensor var_5173 = const()[name = string("op_5173"), val = tensor([1, 32, 128, 64])]; tensor var_5174 = reshape(shape = var_5173, x = query_states_65)[name = string("op_5174")]; tensor var_5179 = const()[name = string("op_5179"), val = tensor([0, 1, 3, 2])]; tensor var_5184 = const()[name = string("op_5184"), val = tensor([1, 8, 128, 64])]; tensor var_5185 = reshape(shape = var_5184, x = key_states_81)[name = string("op_5185")]; tensor var_5190 = const()[name = string("op_5190"), val = tensor([0, 1, 3, 2])]; tensor var_5195 = const()[name = string("op_5195"), val = tensor([1, 8, 128, 64])]; tensor var_5196 = reshape(shape = var_5195, x = value_states_65)[name = string("op_5196")]; tensor var_5201 = const()[name = string("op_5201"), val = tensor([0, 1, 3, 2])]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor x_161 = transpose(perm = var_5179, x = var_5174)[name = string("transpose_88")]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = x_161)[name = string("mean_67")]; tensor input_149 = sub(x = x_161, y = mean_67)[name = string("input_149")]; tensor var_5218_axes_0 = const()[name = string("op_5218_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672320)))]; fp16 var_5206_to_fp16 = const()[name = string("op_5206_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5218_cast_fp16 = layer_norm(axes = var_5218_axes_0, epsilon = var_5206_to_fp16, gamma = model_model_layers_8_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_5218_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor x_163 = transpose(perm = var_5190, x = var_5185)[name = string("transpose_87")]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = x_163)[name = string("mean_69")]; tensor input_151 = sub(x = x_163, y = mean_69)[name = string("input_151")]; tensor var_5236_axes_0 = const()[name = string("op_5236_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672640)))]; fp16 var_5224_to_fp16 = const()[name = string("op_5224_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5236_cast_fp16 = layer_norm(axes = var_5236_axes_0, epsilon = var_5224_to_fp16, gamma = model_model_layers_8_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_5236_cast_fp16")]; tensor var_5251 = mul(x = var_5218_cast_fp16, y = cos_5)[name = string("op_5251")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_5218_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_5218_cast_fp16)[name = string("x2_33")]; fp16 const_183_promoted = const()[name = string("const_183_promoted"), val = fp16(-0x1p+0)]; tensor var_5272 = mul(x = x2_33, y = const_183_promoted)[name = string("op_5272")]; int32 var_5274 = const()[name = string("op_5274"), val = int32(-1)]; bool var_5275_interleave_0 = const()[name = string("op_5275_interleave_0"), val = bool(false)]; tensor var_5275 = concat(axis = var_5274, interleave = var_5275_interleave_0, values = (var_5272, x1_33))[name = string("op_5275")]; tensor var_5276 = mul(x = var_5275, y = sin_5)[name = string("op_5276")]; tensor query_states_67 = add(x = var_5251, y = var_5276)[name = string("query_states_67")]; tensor var_5279 = mul(x = var_5236_cast_fp16, y = cos_5)[name = string("op_5279")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_5236_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_5236_cast_fp16)[name = string("x2_35")]; fp16 const_186_promoted = const()[name = string("const_186_promoted"), val = fp16(-0x1p+0)]; tensor var_5300 = mul(x = x2_35, y = const_186_promoted)[name = string("op_5300")]; int32 var_5302 = const()[name = string("op_5302"), val = int32(-1)]; bool var_5303_interleave_0 = const()[name = string("op_5303_interleave_0"), val = bool(false)]; tensor var_5303 = concat(axis = var_5302, interleave = var_5303_interleave_0, values = (var_5300, x1_35))[name = string("op_5303")]; tensor var_5304 = mul(x = var_5303, y = sin_5)[name = string("op_5304")]; tensor key_states_83 = add(x = var_5279, y = var_5304)[name = string("key_states_83")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_1230, concat_147_values3_0))[name = string("concat_147")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_83, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_52")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([44])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([45])]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_1230, concat_151_values3_0))[name = string("concat_151")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67 = transpose(perm = var_5201, x = var_5196)[name = string("transpose_86")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_67, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_53")]; tensor var_5375_begin_0 = const()[name = string("op_5375_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_5375_end_0 = const()[name = string("op_5375_end_0"), val = tensor([9, 8, 1024, 128])]; tensor var_5375_end_mask_0 = const()[name = string("op_5375_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5375_cast_fp16 = slice_by_index(begin = var_5375_begin_0, end = var_5375_end_0, end_mask = var_5375_end_mask_0, x = coreml_update_state_53)[name = string("op_5375_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_5375_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_5382_begin_0 = const()[name = string("op_5382_begin_0"), val = tensor([44, 0, 0, 0])]; tensor var_5382_end_0 = const()[name = string("op_5382_end_0"), val = tensor([45, 8, 1024, 128])]; tensor var_5382_end_mask_0 = const()[name = string("op_5382_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5382_cast_fp16 = slice_by_index(begin = var_5382_begin_0, end = var_5382_end_0, end_mask = var_5382_end_mask_0, x = coreml_update_state_53)[name = string("op_5382_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_5382_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5411 = const()[name = string("op_5411"), val = tensor([1, 4, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5411, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5423 = const()[name = string("op_5423"), val = tensor([1, -1, 1024, 128])]; tensor key_states_87_cast_fp16 = reshape(shape = var_5423, x = x_169_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5431 = const()[name = string("op_5431"), val = tensor([1, 4, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5431, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; bool var_5458_transpose_x_0 = const()[name = string("op_5458_transpose_x_0"), val = bool(false)]; bool var_5458_transpose_y_0 = const()[name = string("op_5458_transpose_y_0"), val = bool(true)]; tensor var_5458 = matmul(transpose_x = var_5458_transpose_x_0, transpose_y = var_5458_transpose_y_0, x = query_states_67, y = key_states_87_cast_fp16)[name = string("op_5458")]; fp16 var_5459_to_fp16 = const()[name = string("op_5459_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_33_cast_fp16 = mul(x = var_5458, y = var_5459_to_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("attn_weights_35_cast_fp16")]; int32 var_5494 = const()[name = string("op_5494"), val = int32(-1)]; tensor var_5496_cast_fp16 = softmax(axis = var_5494, x = attn_weights_35_cast_fp16)[name = string("op_5496_cast_fp16")]; tensor concat_156 = const()[name = string("concat_156"), val = tensor([32, 64, 1024])]; tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_5496_cast_fp16)[name = string("reshape_24_cast_fp16")]; tensor concat_157 = const()[name = string("concat_157"), val = tensor([32, 1024, 128])]; tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_175_cast_fp16)[name = string("reshape_25_cast_fp16")]; bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 32, 64, 128])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor var_5508_perm_0 = const()[name = string("op_5508_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5527 = const()[name = string("op_5527"), val = tensor([1, 64, 4096])]; tensor var_5508_cast_fp16 = transpose(perm = var_5508_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_85")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_5527, x = var_5508_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_5532 = const()[name = string("op_5532"), val = tensor([0, 2, 1])]; string var_5548_pad_type_0 = const()[name = string("op_5548_pad_type_0"), val = string("valid")]; int32 var_5548_groups_0 = const()[name = string("op_5548_groups_0"), val = int32(1)]; tensor var_5548_strides_0 = const()[name = string("op_5548_strides_0"), val = tensor([1])]; tensor var_5548_pad_0 = const()[name = string("op_5548_pad_0"), val = tensor([0, 0])]; tensor var_5548_dilations_0 = const()[name = string("op_5548_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896915904))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5533_cast_fp16 = transpose(perm = var_5532, x = attn_output_85_cast_fp16)[name = string("transpose_84")]; tensor var_5548_cast_fp16 = conv(dilations = var_5548_dilations_0, groups = var_5548_groups_0, pad = var_5548_pad_0, pad_type = var_5548_pad_type_0, strides = var_5548_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5533_cast_fp16)[name = string("op_5548_cast_fp16")]; tensor var_5552 = const()[name = string("op_5552"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_5552, x = var_5548_cast_fp16)[name = string("transpose_83")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_5571_axes_0 = const()[name = string("op_5571_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896926208)))]; fp16 var_5559_to_fp16 = const()[name = string("op_5559_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5571_cast_fp16 = layer_norm(axes = var_5571_axes_0, epsilon = var_5559_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor var_5585 = const()[name = string("op_5585"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_5586 = transpose(perm = var_5585, x = var_5571_cast_fp16)[name = string("transpose_82")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5586)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_5608_axes_0 = const()[name = string("op_5608_axes_0"), val = tensor([2])]; tensor var_5608 = squeeze(axes = var_5608_axes_0, x = e_17)[name = string("op_5608")]; tensor var_5609 = const()[name = string("op_5609"), val = tensor([0, 2, 1])]; tensor var_5610 = transpose(perm = var_5609, x = var_5608)[name = string("transpose_81")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_5610)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_5628_axes_0 = const()[name = string("op_5628_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896931392)))]; fp16 var_5616_to_fp16 = const()[name = string("op_5616_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5628_cast_fp16 = layer_norm(axes = var_5628_axes_0, epsilon = var_5616_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_5628_cast_fp16")]; tensor var_5640 = const()[name = string("op_5640"), val = tensor([0, 2, 1])]; tensor var_5643_axes_0 = const()[name = string("op_5643_axes_0"), val = tensor([2])]; tensor var_5641 = transpose(perm = var_5640, x = var_5628_cast_fp16)[name = string("transpose_80")]; tensor var_5643 = expand_dims(axes = var_5643_axes_0, x = var_5641)[name = string("op_5643")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_5643)[name = string("query_states_73")]; string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_5643)[name = string("key_states_91")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_5643)[name = string("value_states_73")]; tensor var_5685 = const()[name = string("op_5685"), val = tensor([1, 32, 128, 64])]; tensor var_5686 = reshape(shape = var_5685, x = query_states_73)[name = string("op_5686")]; tensor var_5691 = const()[name = string("op_5691"), val = tensor([0, 1, 3, 2])]; tensor var_5696 = const()[name = string("op_5696"), val = tensor([1, 8, 128, 64])]; tensor var_5697 = reshape(shape = var_5696, x = key_states_91)[name = string("op_5697")]; tensor var_5702 = const()[name = string("op_5702"), val = tensor([0, 1, 3, 2])]; tensor var_5707 = const()[name = string("op_5707"), val = tensor([1, 8, 128, 64])]; tensor var_5708 = reshape(shape = var_5707, x = value_states_73)[name = string("op_5708")]; tensor var_5713 = const()[name = string("op_5713"), val = tensor([0, 1, 3, 2])]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor x_181 = transpose(perm = var_5691, x = var_5686)[name = string("transpose_79")]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = x_181)[name = string("mean_75")]; tensor input_167 = sub(x = x_181, y = mean_75)[name = string("input_167")]; tensor var_5730_axes_0 = const()[name = string("op_5730_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936576)))]; fp16 var_5718_to_fp16 = const()[name = string("op_5718_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5730_cast_fp16 = layer_norm(axes = var_5730_axes_0, epsilon = var_5718_to_fp16, gamma = model_model_layers_9_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_5730_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor x_183 = transpose(perm = var_5702, x = var_5697)[name = string("transpose_78")]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = x_183)[name = string("mean_77")]; tensor input_169 = sub(x = x_183, y = mean_77)[name = string("input_169")]; tensor var_5748_axes_0 = const()[name = string("op_5748_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936896)))]; fp16 var_5736_to_fp16 = const()[name = string("op_5736_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5748_cast_fp16 = layer_norm(axes = var_5748_axes_0, epsilon = var_5736_to_fp16, gamma = model_model_layers_9_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_5748_cast_fp16")]; tensor var_5763 = mul(x = var_5730_cast_fp16, y = cos_5)[name = string("op_5763")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_5730_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_5730_cast_fp16)[name = string("x2_37")]; fp16 const_205_promoted = const()[name = string("const_205_promoted"), val = fp16(-0x1p+0)]; tensor var_5784 = mul(x = x2_37, y = const_205_promoted)[name = string("op_5784")]; int32 var_5786 = const()[name = string("op_5786"), val = int32(-1)]; bool var_5787_interleave_0 = const()[name = string("op_5787_interleave_0"), val = bool(false)]; tensor var_5787 = concat(axis = var_5786, interleave = var_5787_interleave_0, values = (var_5784, x1_37))[name = string("op_5787")]; tensor var_5788 = mul(x = var_5787, y = sin_5)[name = string("op_5788")]; tensor query_states_75 = add(x = var_5763, y = var_5788)[name = string("query_states_75")]; tensor var_5791 = mul(x = var_5748_cast_fp16, y = cos_5)[name = string("op_5791")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_5748_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_5748_cast_fp16)[name = string("x2_39")]; fp16 const_208_promoted = const()[name = string("const_208_promoted"), val = fp16(-0x1p+0)]; tensor var_5812 = mul(x = x2_39, y = const_208_promoted)[name = string("op_5812")]; int32 var_5814 = const()[name = string("op_5814"), val = int32(-1)]; bool var_5815_interleave_0 = const()[name = string("op_5815_interleave_0"), val = bool(false)]; tensor var_5815 = concat(axis = var_5814, interleave = var_5815_interleave_0, values = (var_5812, x1_39))[name = string("op_5815")]; tensor var_5816 = mul(x = var_5815, y = sin_5)[name = string("op_5816")]; tensor key_states_93 = add(x = var_5791, y = var_5816)[name = string("key_states_93")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, var_1230, concat_165_values3_0))[name = string("concat_165")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_93, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_54")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([45])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([46])]; int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, var_1230, concat_169_values3_0))[name = string("concat_169")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75 = transpose(perm = var_5713, x = var_5708)[name = string("transpose_77")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = value_states_75, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_55")]; tensor var_5887_begin_0 = const()[name = string("op_5887_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_5887_end_0 = const()[name = string("op_5887_end_0"), val = tensor([10, 8, 1024, 128])]; tensor var_5887_end_mask_0 = const()[name = string("op_5887_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5887_cast_fp16 = slice_by_index(begin = var_5887_begin_0, end = var_5887_end_0, end_mask = var_5887_end_mask_0, x = coreml_update_state_55)[name = string("op_5887_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_5887_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_5894_begin_0 = const()[name = string("op_5894_begin_0"), val = tensor([45, 0, 0, 0])]; tensor var_5894_end_0 = const()[name = string("op_5894_end_0"), val = tensor([46, 8, 1024, 128])]; tensor var_5894_end_mask_0 = const()[name = string("op_5894_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5894_cast_fp16 = slice_by_index(begin = var_5894_begin_0, end = var_5894_end_0, end_mask = var_5894_end_mask_0, x = coreml_update_state_55)[name = string("op_5894_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_5894_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_5923 = const()[name = string("op_5923"), val = tensor([1, 4, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_5923, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_5935 = const()[name = string("op_5935"), val = tensor([1, -1, 1024, 128])]; tensor key_states_97_cast_fp16 = reshape(shape = var_5935, x = x_189_cast_fp16)[name = string("key_states_97_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_5943 = const()[name = string("op_5943"), val = tensor([1, 4, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_5943, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)]; bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(true)]; tensor var_5970 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = query_states_75, y = key_states_97_cast_fp16)[name = string("op_5970")]; fp16 var_5971_to_fp16 = const()[name = string("op_5971_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_5970, y = var_5971_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_6006 = const()[name = string("op_6006"), val = int32(-1)]; tensor var_6008_cast_fp16 = softmax(axis = var_6006, x = attn_weights_39_cast_fp16)[name = string("op_6008_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([32, 64, 1024])]; tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_6008_cast_fp16)[name = string("reshape_27_cast_fp16")]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([32, 1024, 128])]; tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_195_cast_fp16)[name = string("reshape_28_cast_fp16")]; bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 32, 64, 128])]; tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor var_6020_perm_0 = const()[name = string("op_6020_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6039 = const()[name = string("op_6039"), val = tensor([1, 64, 4096])]; tensor var_6020_cast_fp16 = transpose(perm = var_6020_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_76")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_6039, x = var_6020_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_6044 = const()[name = string("op_6044"), val = tensor([0, 2, 1])]; string var_6060_pad_type_0 = const()[name = string("op_6060_pad_type_0"), val = string("valid")]; int32 var_6060_groups_0 = const()[name = string("op_6060_groups_0"), val = int32(1)]; tensor var_6060_strides_0 = const()[name = string("op_6060_strides_0"), val = tensor([1])]; tensor var_6060_pad_0 = const()[name = string("op_6060_pad_0"), val = tensor([0, 0])]; tensor var_6060_dilations_0 = const()[name = string("op_6060_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896937216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902180160))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6045_cast_fp16 = transpose(perm = var_6044, x = attn_output_95_cast_fp16)[name = string("transpose_75")]; tensor var_6060_cast_fp16 = conv(dilations = var_6060_dilations_0, groups = var_6060_groups_0, pad = var_6060_pad_0, pad_type = var_6060_pad_type_0, strides = var_6060_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6045_cast_fp16)[name = string("op_6060_cast_fp16")]; tensor var_6064 = const()[name = string("op_6064"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_6064, x = var_6060_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_6083_axes_0 = const()[name = string("op_6083_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902190464)))]; fp16 var_6071_to_fp16 = const()[name = string("op_6071_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6083_cast_fp16 = layer_norm(axes = var_6083_axes_0, epsilon = var_6071_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_6083_cast_fp16")]; tensor var_6097 = const()[name = string("op_6097"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_6098 = transpose(perm = var_6097, x = var_6083_cast_fp16)[name = string("transpose_73")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6098)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_6120_axes_0 = const()[name = string("op_6120_axes_0"), val = tensor([2])]; tensor var_6120 = squeeze(axes = var_6120_axes_0, x = e_19)[name = string("op_6120")]; tensor var_6121 = const()[name = string("op_6121"), val = tensor([0, 2, 1])]; tensor var_6122 = transpose(perm = var_6121, x = var_6120)[name = string("transpose_72")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_6122)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_6140_axes_0 = const()[name = string("op_6140_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902195648)))]; fp16 var_6128_to_fp16 = const()[name = string("op_6128_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6140_cast_fp16 = layer_norm(axes = var_6140_axes_0, epsilon = var_6128_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_6140_cast_fp16")]; tensor var_6152 = const()[name = string("op_6152"), val = tensor([0, 2, 1])]; tensor var_6155_axes_0 = const()[name = string("op_6155_axes_0"), val = tensor([2])]; tensor var_6153 = transpose(perm = var_6152, x = var_6140_cast_fp16)[name = string("transpose_71")]; tensor var_6155 = expand_dims(axes = var_6155_axes_0, x = var_6153)[name = string("op_6155")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_6155)[name = string("query_states_81")]; string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_6155)[name = string("key_states_101")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_6155)[name = string("value_states_81")]; tensor var_6197 = const()[name = string("op_6197"), val = tensor([1, 32, 128, 64])]; tensor var_6198 = reshape(shape = var_6197, x = query_states_81)[name = string("op_6198")]; tensor var_6203 = const()[name = string("op_6203"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 8, 128, 64])]; tensor var_6209 = reshape(shape = var_6208, x = key_states_101)[name = string("op_6209")]; tensor var_6214 = const()[name = string("op_6214"), val = tensor([0, 1, 3, 2])]; tensor var_6219 = const()[name = string("op_6219"), val = tensor([1, 8, 128, 64])]; tensor var_6220 = reshape(shape = var_6219, x = value_states_81)[name = string("op_6220")]; tensor var_6225 = const()[name = string("op_6225"), val = tensor([0, 1, 3, 2])]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor x_201 = transpose(perm = var_6203, x = var_6198)[name = string("transpose_70")]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = x_201)[name = string("mean_83")]; tensor input_185 = sub(x = x_201, y = mean_83)[name = string("input_185")]; tensor var_6242_axes_0 = const()[name = string("op_6242_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902200832)))]; fp16 var_6230_to_fp16 = const()[name = string("op_6230_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6242_cast_fp16 = layer_norm(axes = var_6242_axes_0, epsilon = var_6230_to_fp16, gamma = model_model_layers_10_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_6242_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor x_203 = transpose(perm = var_6214, x = var_6209)[name = string("transpose_69")]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = x_203)[name = string("mean_85")]; tensor input_187 = sub(x = x_203, y = mean_85)[name = string("input_187")]; tensor var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201152)))]; fp16 var_6248_to_fp16 = const()[name = string("op_6248_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, epsilon = var_6248_to_fp16, gamma = model_model_layers_10_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_6260_cast_fp16")]; tensor var_6275 = mul(x = var_6242_cast_fp16, y = cos_5)[name = string("op_6275")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_6242_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_6242_cast_fp16)[name = string("x2_41")]; fp16 const_227_promoted = const()[name = string("const_227_promoted"), val = fp16(-0x1p+0)]; tensor var_6296 = mul(x = x2_41, y = const_227_promoted)[name = string("op_6296")]; int32 var_6298 = const()[name = string("op_6298"), val = int32(-1)]; bool var_6299_interleave_0 = const()[name = string("op_6299_interleave_0"), val = bool(false)]; tensor var_6299 = concat(axis = var_6298, interleave = var_6299_interleave_0, values = (var_6296, x1_41))[name = string("op_6299")]; tensor var_6300 = mul(x = var_6299, y = sin_5)[name = string("op_6300")]; tensor query_states_83 = add(x = var_6275, y = var_6300)[name = string("query_states_83")]; tensor var_6303 = mul(x = var_6260_cast_fp16, y = cos_5)[name = string("op_6303")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_6260_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_6260_cast_fp16)[name = string("x2_43")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_6324 = mul(x = x2_43, y = const_230_promoted)[name = string("op_6324")]; int32 var_6326 = const()[name = string("op_6326"), val = int32(-1)]; bool var_6327_interleave_0 = const()[name = string("op_6327_interleave_0"), val = bool(false)]; tensor var_6327 = concat(axis = var_6326, interleave = var_6327_interleave_0, values = (var_6324, x1_43))[name = string("op_6327")]; tensor var_6328 = mul(x = var_6327, y = sin_5)[name = string("op_6328")]; tensor key_states_103 = add(x = var_6303, y = var_6328)[name = string("key_states_103")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, var_1230, concat_183_values3_0))[name = string("concat_183")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_103, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_56")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([46])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([47])]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, var_1230, concat_187_values3_0))[name = string("concat_187")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83 = transpose(perm = var_6225, x = var_6220)[name = string("transpose_68")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = value_states_83, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_57")]; tensor var_6399_begin_0 = const()[name = string("op_6399_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_6399_end_0 = const()[name = string("op_6399_end_0"), val = tensor([11, 8, 1024, 128])]; tensor var_6399_end_mask_0 = const()[name = string("op_6399_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6399_cast_fp16 = slice_by_index(begin = var_6399_begin_0, end = var_6399_end_0, end_mask = var_6399_end_mask_0, x = coreml_update_state_57)[name = string("op_6399_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_6399_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_6406_begin_0 = const()[name = string("op_6406_begin_0"), val = tensor([46, 0, 0, 0])]; tensor var_6406_end_0 = const()[name = string("op_6406_end_0"), val = tensor([47, 8, 1024, 128])]; tensor var_6406_end_mask_0 = const()[name = string("op_6406_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6406_cast_fp16 = slice_by_index(begin = var_6406_begin_0, end = var_6406_end_0, end_mask = var_6406_end_mask_0, x = coreml_update_state_57)[name = string("op_6406_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_6406_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_6435 = const()[name = string("op_6435"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_6435, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_6447 = const()[name = string("op_6447"), val = tensor([1, -1, 1024, 128])]; tensor key_states_107_cast_fp16 = reshape(shape = var_6447, x = x_209_cast_fp16)[name = string("key_states_107_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_6455 = const()[name = string("op_6455"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_6455, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_6482_transpose_x_0 = const()[name = string("op_6482_transpose_x_0"), val = bool(false)]; bool var_6482_transpose_y_0 = const()[name = string("op_6482_transpose_y_0"), val = bool(true)]; tensor var_6482 = matmul(transpose_x = var_6482_transpose_x_0, transpose_y = var_6482_transpose_y_0, x = query_states_83, y = key_states_107_cast_fp16)[name = string("op_6482")]; fp16 var_6483_to_fp16 = const()[name = string("op_6483_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_41_cast_fp16 = mul(x = var_6482, y = var_6483_to_fp16)[name = string("attn_weights_41_cast_fp16")]; tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask)[name = string("attn_weights_43_cast_fp16")]; int32 var_6518 = const()[name = string("op_6518"), val = int32(-1)]; tensor var_6520_cast_fp16 = softmax(axis = var_6518, x = attn_weights_43_cast_fp16)[name = string("op_6520_cast_fp16")]; tensor concat_192 = const()[name = string("concat_192"), val = tensor([32, 64, 1024])]; tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_6520_cast_fp16)[name = string("reshape_30_cast_fp16")]; tensor concat_193 = const()[name = string("concat_193"), val = tensor([32, 1024, 128])]; tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_215_cast_fp16)[name = string("reshape_31_cast_fp16")]; bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 32, 64, 128])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor var_6532_perm_0 = const()[name = string("op_6532_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6551 = const()[name = string("op_6551"), val = tensor([1, 64, 4096])]; tensor var_6532_cast_fp16 = transpose(perm = var_6532_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_67")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_6551, x = var_6532_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_6556 = const()[name = string("op_6556"), val = tensor([0, 2, 1])]; string var_6572_pad_type_0 = const()[name = string("op_6572_pad_type_0"), val = string("valid")]; int32 var_6572_groups_0 = const()[name = string("op_6572_groups_0"), val = int32(1)]; tensor var_6572_strides_0 = const()[name = string("op_6572_strides_0"), val = tensor([1])]; tensor var_6572_pad_0 = const()[name = string("op_6572_pad_0"), val = tensor([0, 0])]; tensor var_6572_dilations_0 = const()[name = string("op_6572_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907444416))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6557_cast_fp16 = transpose(perm = var_6556, x = attn_output_105_cast_fp16)[name = string("transpose_66")]; tensor var_6572_cast_fp16 = conv(dilations = var_6572_dilations_0, groups = var_6572_groups_0, pad = var_6572_pad_0, pad_type = var_6572_pad_type_0, strides = var_6572_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6557_cast_fp16)[name = string("op_6572_cast_fp16")]; tensor var_6576 = const()[name = string("op_6576"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_6576, x = var_6572_cast_fp16)[name = string("transpose_65")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_6595_axes_0 = const()[name = string("op_6595_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907454720)))]; fp16 var_6583_to_fp16 = const()[name = string("op_6583_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6595_cast_fp16 = layer_norm(axes = var_6595_axes_0, epsilon = var_6583_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_6595_cast_fp16")]; tensor var_6609 = const()[name = string("op_6609"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_6610 = transpose(perm = var_6609, x = var_6595_cast_fp16)[name = string("transpose_64")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6610)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_6632_axes_0 = const()[name = string("op_6632_axes_0"), val = tensor([2])]; tensor var_6632 = squeeze(axes = var_6632_axes_0, x = e_21)[name = string("op_6632")]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([0, 2, 1])]; tensor var_6634 = transpose(perm = var_6633, x = var_6632)[name = string("transpose_63")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_6634)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_6652_axes_0 = const()[name = string("op_6652_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907459904)))]; fp16 var_6640_to_fp16 = const()[name = string("op_6640_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6652_cast_fp16 = layer_norm(axes = var_6652_axes_0, epsilon = var_6640_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_6652_cast_fp16")]; tensor var_6664 = const()[name = string("op_6664"), val = tensor([0, 2, 1])]; tensor var_6667_axes_0 = const()[name = string("op_6667_axes_0"), val = tensor([2])]; tensor var_6665 = transpose(perm = var_6664, x = var_6652_cast_fp16)[name = string("transpose_62")]; tensor var_6667 = expand_dims(axes = var_6667_axes_0, x = var_6665)[name = string("op_6667")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_6667)[name = string("query_states_89")]; string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_6667)[name = string("key_states_111")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_6667)[name = string("value_states_89")]; tensor var_6709 = const()[name = string("op_6709"), val = tensor([1, 32, 128, 64])]; tensor var_6710 = reshape(shape = var_6709, x = query_states_89)[name = string("op_6710")]; tensor var_6715 = const()[name = string("op_6715"), val = tensor([0, 1, 3, 2])]; tensor var_6720 = const()[name = string("op_6720"), val = tensor([1, 8, 128, 64])]; tensor var_6721 = reshape(shape = var_6720, x = key_states_111)[name = string("op_6721")]; tensor var_6726 = const()[name = string("op_6726"), val = tensor([0, 1, 3, 2])]; tensor var_6731 = const()[name = string("op_6731"), val = tensor([1, 8, 128, 64])]; tensor var_6732 = reshape(shape = var_6731, x = value_states_89)[name = string("op_6732")]; tensor var_6737 = const()[name = string("op_6737"), val = tensor([0, 1, 3, 2])]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor x_221 = transpose(perm = var_6715, x = var_6710)[name = string("transpose_61")]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = x_221)[name = string("mean_91")]; tensor input_203 = sub(x = x_221, y = mean_91)[name = string("input_203")]; tensor var_6754_axes_0 = const()[name = string("op_6754_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465088)))]; fp16 var_6742_to_fp16 = const()[name = string("op_6742_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6754_cast_fp16 = layer_norm(axes = var_6754_axes_0, epsilon = var_6742_to_fp16, gamma = model_model_layers_11_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_6754_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor x_223 = transpose(perm = var_6726, x = var_6721)[name = string("transpose_60")]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = x_223)[name = string("mean_93")]; tensor input_205 = sub(x = x_223, y = mean_93)[name = string("input_205")]; tensor var_6772_axes_0 = const()[name = string("op_6772_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465408)))]; fp16 var_6760_to_fp16 = const()[name = string("op_6760_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6772_cast_fp16 = layer_norm(axes = var_6772_axes_0, epsilon = var_6760_to_fp16, gamma = model_model_layers_11_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_6772_cast_fp16")]; tensor var_6787 = mul(x = var_6754_cast_fp16, y = cos_5)[name = string("op_6787")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_6754_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_6754_cast_fp16)[name = string("x2_45")]; fp16 const_249_promoted = const()[name = string("const_249_promoted"), val = fp16(-0x1p+0)]; tensor var_6808 = mul(x = x2_45, y = const_249_promoted)[name = string("op_6808")]; int32 var_6810 = const()[name = string("op_6810"), val = int32(-1)]; bool var_6811_interleave_0 = const()[name = string("op_6811_interleave_0"), val = bool(false)]; tensor var_6811 = concat(axis = var_6810, interleave = var_6811_interleave_0, values = (var_6808, x1_45))[name = string("op_6811")]; tensor var_6812 = mul(x = var_6811, y = sin_5)[name = string("op_6812")]; tensor query_states_91 = add(x = var_6787, y = var_6812)[name = string("query_states_91")]; tensor var_6815 = mul(x = var_6772_cast_fp16, y = cos_5)[name = string("op_6815")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_6772_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_6772_cast_fp16)[name = string("x2_47")]; fp16 const_252_promoted = const()[name = string("const_252_promoted"), val = fp16(-0x1p+0)]; tensor var_6836 = mul(x = x2_47, y = const_252_promoted)[name = string("op_6836")]; int32 var_6838 = const()[name = string("op_6838"), val = int32(-1)]; bool var_6839_interleave_0 = const()[name = string("op_6839_interleave_0"), val = bool(false)]; tensor var_6839 = concat(axis = var_6838, interleave = var_6839_interleave_0, values = (var_6836, x1_47))[name = string("op_6839")]; tensor var_6840 = mul(x = var_6839, y = sin_5)[name = string("op_6840")]; tensor key_states_113 = add(x = var_6815, y = var_6840)[name = string("key_states_113")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_200")]; tensor concat_201_values1_0 = const()[name = string("concat_201_values1_0"), val = tensor([0])]; tensor concat_201_values3_0 = const()[name = string("concat_201_values3_0"), val = tensor([0])]; int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (expand_dims_136, concat_201_values1_0, var_1230, concat_201_values3_0))[name = string("concat_201")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_200, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_201, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_113, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_58")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([47])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([48])]; int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_204")]; tensor concat_205_values1_0 = const()[name = string("concat_205_values1_0"), val = tensor([0])]; tensor concat_205_values3_0 = const()[name = string("concat_205_values3_0"), val = tensor([0])]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (expand_dims_142, concat_205_values1_0, var_1230, concat_205_values3_0))[name = string("concat_205")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91 = transpose(perm = var_6737, x = var_6732)[name = string("transpose_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = value_states_91, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_59")]; tensor var_6911_begin_0 = const()[name = string("op_6911_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_6911_end_0 = const()[name = string("op_6911_end_0"), val = tensor([12, 8, 1024, 128])]; tensor var_6911_end_mask_0 = const()[name = string("op_6911_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6911_cast_fp16 = slice_by_index(begin = var_6911_begin_0, end = var_6911_end_0, end_mask = var_6911_end_mask_0, x = coreml_update_state_59)[name = string("op_6911_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_6911_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_6918_begin_0 = const()[name = string("op_6918_begin_0"), val = tensor([47, 0, 0, 0])]; tensor var_6918_end_0 = const()[name = string("op_6918_end_0"), val = tensor([48, 8, 1024, 128])]; tensor var_6918_end_mask_0 = const()[name = string("op_6918_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6918_cast_fp16 = slice_by_index(begin = var_6918_begin_0, end = var_6918_end_0, end_mask = var_6918_end_mask_0, x = coreml_update_state_59)[name = string("op_6918_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_6918_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_6947 = const()[name = string("op_6947"), val = tensor([1, 4, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_6947, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_6959 = const()[name = string("op_6959"), val = tensor([1, -1, 1024, 128])]; tensor key_states_117_cast_fp16 = reshape(shape = var_6959, x = x_229_cast_fp16)[name = string("key_states_117_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_6967 = const()[name = string("op_6967"), val = tensor([1, 4, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_6967, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; bool var_6994_transpose_x_0 = const()[name = string("op_6994_transpose_x_0"), val = bool(false)]; bool var_6994_transpose_y_0 = const()[name = string("op_6994_transpose_y_0"), val = bool(true)]; tensor var_6994 = matmul(transpose_x = var_6994_transpose_x_0, transpose_y = var_6994_transpose_y_0, x = query_states_91, y = key_states_117_cast_fp16)[name = string("op_6994")]; fp16 var_6995_to_fp16 = const()[name = string("op_6995_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_45_cast_fp16 = mul(x = var_6994, y = var_6995_to_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; int32 var_7030 = const()[name = string("op_7030"), val = int32(-1)]; tensor var_7032_cast_fp16 = softmax(axis = var_7030, x = attn_weights_47_cast_fp16)[name = string("op_7032_cast_fp16")]; tensor concat_210 = const()[name = string("concat_210"), val = tensor([32, 64, 1024])]; tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_7032_cast_fp16)[name = string("reshape_33_cast_fp16")]; tensor concat_211 = const()[name = string("concat_211"), val = tensor([32, 1024, 128])]; tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_235_cast_fp16)[name = string("reshape_34_cast_fp16")]; bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 32, 64, 128])]; tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor var_7044_perm_0 = const()[name = string("op_7044_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7063 = const()[name = string("op_7063"), val = tensor([1, 64, 4096])]; tensor var_7044_cast_fp16 = transpose(perm = var_7044_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_58")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_7063, x = var_7044_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_7068 = const()[name = string("op_7068"), val = tensor([0, 2, 1])]; string var_7084_pad_type_0 = const()[name = string("op_7084_pad_type_0"), val = string("valid")]; int32 var_7084_groups_0 = const()[name = string("op_7084_groups_0"), val = int32(1)]; tensor var_7084_strides_0 = const()[name = string("op_7084_strides_0"), val = tensor([1])]; tensor var_7084_pad_0 = const()[name = string("op_7084_pad_0"), val = tensor([0, 0])]; tensor var_7084_dilations_0 = const()[name = string("op_7084_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912708672))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7069_cast_fp16 = transpose(perm = var_7068, x = attn_output_115_cast_fp16)[name = string("transpose_57")]; tensor var_7084_cast_fp16 = conv(dilations = var_7084_dilations_0, groups = var_7084_groups_0, pad = var_7084_pad_0, pad_type = var_7084_pad_type_0, strides = var_7084_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7069_cast_fp16)[name = string("op_7084_cast_fp16")]; tensor var_7088 = const()[name = string("op_7088"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_7088, x = var_7084_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_7107_axes_0 = const()[name = string("op_7107_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912718976)))]; fp16 var_7095_to_fp16 = const()[name = string("op_7095_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7107_cast_fp16 = layer_norm(axes = var_7107_axes_0, epsilon = var_7095_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_7107_cast_fp16")]; tensor var_7121 = const()[name = string("op_7121"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_7122 = transpose(perm = var_7121, x = var_7107_cast_fp16)[name = string("transpose_55")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7122)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_7144_axes_0 = const()[name = string("op_7144_axes_0"), val = tensor([2])]; tensor var_7144 = squeeze(axes = var_7144_axes_0, x = e_23)[name = string("op_7144")]; tensor var_7145 = const()[name = string("op_7145"), val = tensor([0, 2, 1])]; tensor var_7146 = transpose(perm = var_7145, x = var_7144)[name = string("transpose_54")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_7146)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_7164_axes_0 = const()[name = string("op_7164_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912724160)))]; fp16 var_7152_to_fp16 = const()[name = string("op_7152_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7164_cast_fp16 = layer_norm(axes = var_7164_axes_0, epsilon = var_7152_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_7164_cast_fp16")]; tensor var_7176 = const()[name = string("op_7176"), val = tensor([0, 2, 1])]; tensor var_7179_axes_0 = const()[name = string("op_7179_axes_0"), val = tensor([2])]; tensor var_7177 = transpose(perm = var_7176, x = var_7164_cast_fp16)[name = string("transpose_53")]; tensor var_7179 = expand_dims(axes = var_7179_axes_0, x = var_7177)[name = string("op_7179")]; string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_7179)[name = string("query_states_97")]; string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_7179)[name = string("key_states_121")]; string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_7179)[name = string("value_states_97")]; tensor var_7221 = const()[name = string("op_7221"), val = tensor([1, 32, 128, 64])]; tensor var_7222 = reshape(shape = var_7221, x = query_states_97)[name = string("op_7222")]; tensor var_7227 = const()[name = string("op_7227"), val = tensor([0, 1, 3, 2])]; tensor var_7232 = const()[name = string("op_7232"), val = tensor([1, 8, 128, 64])]; tensor var_7233 = reshape(shape = var_7232, x = key_states_121)[name = string("op_7233")]; tensor var_7238 = const()[name = string("op_7238"), val = tensor([0, 1, 3, 2])]; tensor var_7243 = const()[name = string("op_7243"), val = tensor([1, 8, 128, 64])]; tensor var_7244 = reshape(shape = var_7243, x = value_states_97)[name = string("op_7244")]; tensor var_7249 = const()[name = string("op_7249"), val = tensor([0, 1, 3, 2])]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor x_241 = transpose(perm = var_7227, x = var_7222)[name = string("transpose_52")]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = x_241)[name = string("mean_99")]; tensor input_221 = sub(x = x_241, y = mean_99)[name = string("input_221")]; tensor var_7266_axes_0 = const()[name = string("op_7266_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729344)))]; fp16 var_7254_to_fp16 = const()[name = string("op_7254_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7266_cast_fp16 = layer_norm(axes = var_7266_axes_0, epsilon = var_7254_to_fp16, gamma = model_model_layers_12_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_7266_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor x_243 = transpose(perm = var_7238, x = var_7233)[name = string("transpose_51")]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = x_243)[name = string("mean_101")]; tensor input_223 = sub(x = x_243, y = mean_101)[name = string("input_223")]; tensor var_7284_axes_0 = const()[name = string("op_7284_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729664)))]; fp16 var_7272_to_fp16 = const()[name = string("op_7272_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7284_cast_fp16 = layer_norm(axes = var_7284_axes_0, epsilon = var_7272_to_fp16, gamma = model_model_layers_12_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_7284_cast_fp16")]; tensor var_7299 = mul(x = var_7266_cast_fp16, y = cos_5)[name = string("op_7299")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_7266_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_7266_cast_fp16)[name = string("x2_49")]; fp16 const_271_promoted = const()[name = string("const_271_promoted"), val = fp16(-0x1p+0)]; tensor var_7320 = mul(x = x2_49, y = const_271_promoted)[name = string("op_7320")]; int32 var_7322 = const()[name = string("op_7322"), val = int32(-1)]; bool var_7323_interleave_0 = const()[name = string("op_7323_interleave_0"), val = bool(false)]; tensor var_7323 = concat(axis = var_7322, interleave = var_7323_interleave_0, values = (var_7320, x1_49))[name = string("op_7323")]; tensor var_7324 = mul(x = var_7323, y = sin_5)[name = string("op_7324")]; tensor query_states_99 = add(x = var_7299, y = var_7324)[name = string("query_states_99")]; tensor var_7327 = mul(x = var_7284_cast_fp16, y = cos_5)[name = string("op_7327")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_7284_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_7284_cast_fp16)[name = string("x2_51")]; fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; tensor var_7348 = mul(x = x2_51, y = const_274_promoted)[name = string("op_7348")]; int32 var_7350 = const()[name = string("op_7350"), val = int32(-1)]; bool var_7351_interleave_0 = const()[name = string("op_7351_interleave_0"), val = bool(false)]; tensor var_7351 = concat(axis = var_7350, interleave = var_7351_interleave_0, values = (var_7348, x1_51))[name = string("op_7351")]; tensor var_7352 = mul(x = var_7351, y = sin_5)[name = string("op_7352")]; tensor key_states_123 = add(x = var_7327, y = var_7352)[name = string("key_states_123")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, var_1230, concat_219_values3_0))[name = string("concat_219")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_123, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_60")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([48])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([49])]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, var_1230, concat_223_values3_0))[name = string("concat_223")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_99 = transpose(perm = var_7249, x = var_7244)[name = string("transpose_50")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = value_states_99, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_61")]; tensor var_7423_begin_0 = const()[name = string("op_7423_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_7423_end_0 = const()[name = string("op_7423_end_0"), val = tensor([13, 8, 1024, 128])]; tensor var_7423_end_mask_0 = const()[name = string("op_7423_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7423_cast_fp16 = slice_by_index(begin = var_7423_begin_0, end = var_7423_end_0, end_mask = var_7423_end_mask_0, x = coreml_update_state_61)[name = string("op_7423_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_7423_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_7430_begin_0 = const()[name = string("op_7430_begin_0"), val = tensor([48, 0, 0, 0])]; tensor var_7430_end_0 = const()[name = string("op_7430_end_0"), val = tensor([49, 8, 1024, 128])]; tensor var_7430_end_mask_0 = const()[name = string("op_7430_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7430_cast_fp16 = slice_by_index(begin = var_7430_begin_0, end = var_7430_end_0, end_mask = var_7430_end_mask_0, x = coreml_update_state_61)[name = string("op_7430_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_7430_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_7459 = const()[name = string("op_7459"), val = tensor([1, 4, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_7459, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_7471 = const()[name = string("op_7471"), val = tensor([1, -1, 1024, 128])]; tensor key_states_127_cast_fp16 = reshape(shape = var_7471, x = x_249_cast_fp16)[name = string("key_states_127_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_7479 = const()[name = string("op_7479"), val = tensor([1, 4, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_7479, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; bool var_7506_transpose_x_0 = const()[name = string("op_7506_transpose_x_0"), val = bool(false)]; bool var_7506_transpose_y_0 = const()[name = string("op_7506_transpose_y_0"), val = bool(true)]; tensor var_7506 = matmul(transpose_x = var_7506_transpose_x_0, transpose_y = var_7506_transpose_y_0, x = query_states_99, y = key_states_127_cast_fp16)[name = string("op_7506")]; fp16 var_7507_to_fp16 = const()[name = string("op_7507_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_7506, y = var_7507_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_7542 = const()[name = string("op_7542"), val = int32(-1)]; tensor var_7544_cast_fp16 = softmax(axis = var_7542, x = attn_weights_51_cast_fp16)[name = string("op_7544_cast_fp16")]; tensor concat_228 = const()[name = string("concat_228"), val = tensor([32, 64, 1024])]; tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_7544_cast_fp16)[name = string("reshape_36_cast_fp16")]; tensor concat_229 = const()[name = string("concat_229"), val = tensor([32, 1024, 128])]; tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_255_cast_fp16)[name = string("reshape_37_cast_fp16")]; bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 32, 64, 128])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor var_7556_perm_0 = const()[name = string("op_7556_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7575 = const()[name = string("op_7575"), val = tensor([1, 64, 4096])]; tensor var_7556_cast_fp16 = transpose(perm = var_7556_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_49")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_7575, x = var_7556_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_7580 = const()[name = string("op_7580"), val = tensor([0, 2, 1])]; string var_7596_pad_type_0 = const()[name = string("op_7596_pad_type_0"), val = string("valid")]; int32 var_7596_groups_0 = const()[name = string("op_7596_groups_0"), val = int32(1)]; tensor var_7596_strides_0 = const()[name = string("op_7596_strides_0"), val = tensor([1])]; tensor var_7596_pad_0 = const()[name = string("op_7596_pad_0"), val = tensor([0, 0])]; tensor var_7596_dilations_0 = const()[name = string("op_7596_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917972928))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7581_cast_fp16 = transpose(perm = var_7580, x = attn_output_125_cast_fp16)[name = string("transpose_48")]; tensor var_7596_cast_fp16 = conv(dilations = var_7596_dilations_0, groups = var_7596_groups_0, pad = var_7596_pad_0, pad_type = var_7596_pad_type_0, strides = var_7596_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7581_cast_fp16)[name = string("op_7596_cast_fp16")]; tensor var_7600 = const()[name = string("op_7600"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_7600, x = var_7596_cast_fp16)[name = string("transpose_47")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_7619_axes_0 = const()[name = string("op_7619_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917983232)))]; fp16 var_7607_to_fp16 = const()[name = string("op_7607_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7619_cast_fp16 = layer_norm(axes = var_7619_axes_0, epsilon = var_7607_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_7619_cast_fp16")]; tensor var_7633 = const()[name = string("op_7633"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_7634 = transpose(perm = var_7633, x = var_7619_cast_fp16)[name = string("transpose_46")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7634)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_7656_axes_0 = const()[name = string("op_7656_axes_0"), val = tensor([2])]; tensor var_7656 = squeeze(axes = var_7656_axes_0, x = e_25)[name = string("op_7656")]; tensor var_7657 = const()[name = string("op_7657"), val = tensor([0, 2, 1])]; tensor var_7658 = transpose(perm = var_7657, x = var_7656)[name = string("transpose_45")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_7658)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_7676_axes_0 = const()[name = string("op_7676_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917988416)))]; fp16 var_7664_to_fp16 = const()[name = string("op_7664_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7676_cast_fp16 = layer_norm(axes = var_7676_axes_0, epsilon = var_7664_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_7676_cast_fp16")]; tensor var_7688 = const()[name = string("op_7688"), val = tensor([0, 2, 1])]; tensor var_7691_axes_0 = const()[name = string("op_7691_axes_0"), val = tensor([2])]; tensor var_7689 = transpose(perm = var_7688, x = var_7676_cast_fp16)[name = string("transpose_44")]; tensor var_7691 = expand_dims(axes = var_7691_axes_0, x = var_7689)[name = string("op_7691")]; string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_7691)[name = string("query_states_105")]; string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_7691)[name = string("key_states_131")]; string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_7691)[name = string("value_states_105")]; tensor var_7733 = const()[name = string("op_7733"), val = tensor([1, 32, 128, 64])]; tensor var_7734 = reshape(shape = var_7733, x = query_states_105)[name = string("op_7734")]; tensor var_7739 = const()[name = string("op_7739"), val = tensor([0, 1, 3, 2])]; tensor var_7744 = const()[name = string("op_7744"), val = tensor([1, 8, 128, 64])]; tensor var_7745 = reshape(shape = var_7744, x = key_states_131)[name = string("op_7745")]; tensor var_7750 = const()[name = string("op_7750"), val = tensor([0, 1, 3, 2])]; tensor var_7755 = const()[name = string("op_7755"), val = tensor([1, 8, 128, 64])]; tensor var_7756 = reshape(shape = var_7755, x = value_states_105)[name = string("op_7756")]; tensor var_7761 = const()[name = string("op_7761"), val = tensor([0, 1, 3, 2])]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor x_261 = transpose(perm = var_7739, x = var_7734)[name = string("transpose_43")]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = x_261)[name = string("mean_107")]; tensor input_239 = sub(x = x_261, y = mean_107)[name = string("input_239")]; tensor var_7778_axes_0 = const()[name = string("op_7778_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993600)))]; fp16 var_7766_to_fp16 = const()[name = string("op_7766_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7778_cast_fp16 = layer_norm(axes = var_7778_axes_0, epsilon = var_7766_to_fp16, gamma = model_model_layers_13_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_7778_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor x_263 = transpose(perm = var_7750, x = var_7745)[name = string("transpose_42")]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = x_263)[name = string("mean_109")]; tensor input_241 = sub(x = x_263, y = mean_109)[name = string("input_241")]; tensor var_7796_axes_0 = const()[name = string("op_7796_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993920)))]; fp16 var_7784_to_fp16 = const()[name = string("op_7784_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7796_cast_fp16 = layer_norm(axes = var_7796_axes_0, epsilon = var_7784_to_fp16, gamma = model_model_layers_13_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_7796_cast_fp16")]; tensor var_7811 = mul(x = var_7778_cast_fp16, y = cos_5)[name = string("op_7811")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_7778_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_7778_cast_fp16)[name = string("x2_53")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_7832 = mul(x = x2_53, y = const_293_promoted)[name = string("op_7832")]; int32 var_7834 = const()[name = string("op_7834"), val = int32(-1)]; bool var_7835_interleave_0 = const()[name = string("op_7835_interleave_0"), val = bool(false)]; tensor var_7835 = concat(axis = var_7834, interleave = var_7835_interleave_0, values = (var_7832, x1_53))[name = string("op_7835")]; tensor var_7836 = mul(x = var_7835, y = sin_5)[name = string("op_7836")]; tensor query_states_107 = add(x = var_7811, y = var_7836)[name = string("query_states_107")]; tensor var_7839 = mul(x = var_7796_cast_fp16, y = cos_5)[name = string("op_7839")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_7796_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_7796_cast_fp16)[name = string("x2_55")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_7860 = mul(x = x2_55, y = const_296_promoted)[name = string("op_7860")]; int32 var_7862 = const()[name = string("op_7862"), val = int32(-1)]; bool var_7863_interleave_0 = const()[name = string("op_7863_interleave_0"), val = bool(false)]; tensor var_7863 = concat(axis = var_7862, interleave = var_7863_interleave_0, values = (var_7860, x1_55))[name = string("op_7863")]; tensor var_7864 = mul(x = var_7863, y = sin_5)[name = string("op_7864")]; tensor key_states_133 = add(x = var_7839, y = var_7864)[name = string("key_states_133")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, var_1230, concat_237_values3_0))[name = string("concat_237")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_133, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_62")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([49])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([50])]; int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, var_1230, concat_241_values3_0))[name = string("concat_241")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_107 = transpose(perm = var_7761, x = var_7756)[name = string("transpose_41")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = value_states_107, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_63")]; tensor var_7935_begin_0 = const()[name = string("op_7935_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_7935_end_0 = const()[name = string("op_7935_end_0"), val = tensor([14, 8, 1024, 128])]; tensor var_7935_end_mask_0 = const()[name = string("op_7935_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7935_cast_fp16 = slice_by_index(begin = var_7935_begin_0, end = var_7935_end_0, end_mask = var_7935_end_mask_0, x = coreml_update_state_63)[name = string("op_7935_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_7935_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_7942_begin_0 = const()[name = string("op_7942_begin_0"), val = tensor([49, 0, 0, 0])]; tensor var_7942_end_0 = const()[name = string("op_7942_end_0"), val = tensor([50, 8, 1024, 128])]; tensor var_7942_end_mask_0 = const()[name = string("op_7942_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7942_cast_fp16 = slice_by_index(begin = var_7942_begin_0, end = var_7942_end_0, end_mask = var_7942_end_mask_0, x = coreml_update_state_63)[name = string("op_7942_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_7942_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_7971 = const()[name = string("op_7971"), val = tensor([1, 4, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_7971, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_7983 = const()[name = string("op_7983"), val = tensor([1, -1, 1024, 128])]; tensor key_states_137_cast_fp16 = reshape(shape = var_7983, x = x_269_cast_fp16)[name = string("key_states_137_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_7991 = const()[name = string("op_7991"), val = tensor([1, 4, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_7991, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; bool var_8018_transpose_x_0 = const()[name = string("op_8018_transpose_x_0"), val = bool(false)]; bool var_8018_transpose_y_0 = const()[name = string("op_8018_transpose_y_0"), val = bool(true)]; tensor var_8018 = matmul(transpose_x = var_8018_transpose_x_0, transpose_y = var_8018_transpose_y_0, x = query_states_107, y = key_states_137_cast_fp16)[name = string("op_8018")]; fp16 var_8019_to_fp16 = const()[name = string("op_8019_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_53_cast_fp16 = mul(x = var_8018, y = var_8019_to_fp16)[name = string("attn_weights_53_cast_fp16")]; tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask)[name = string("attn_weights_55_cast_fp16")]; int32 var_8054 = const()[name = string("op_8054"), val = int32(-1)]; tensor var_8056_cast_fp16 = softmax(axis = var_8054, x = attn_weights_55_cast_fp16)[name = string("op_8056_cast_fp16")]; tensor concat_246 = const()[name = string("concat_246"), val = tensor([32, 64, 1024])]; tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_8056_cast_fp16)[name = string("reshape_39_cast_fp16")]; tensor concat_247 = const()[name = string("concat_247"), val = tensor([32, 1024, 128])]; tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_275_cast_fp16)[name = string("reshape_40_cast_fp16")]; bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 32, 64, 128])]; tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor var_8068_perm_0 = const()[name = string("op_8068_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8087 = const()[name = string("op_8087"), val = tensor([1, 64, 4096])]; tensor var_8068_cast_fp16 = transpose(perm = var_8068_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_40")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_8087, x = var_8068_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_8092 = const()[name = string("op_8092"), val = tensor([0, 2, 1])]; string var_8108_pad_type_0 = const()[name = string("op_8108_pad_type_0"), val = string("valid")]; int32 var_8108_groups_0 = const()[name = string("op_8108_groups_0"), val = int32(1)]; tensor var_8108_strides_0 = const()[name = string("op_8108_strides_0"), val = tensor([1])]; tensor var_8108_pad_0 = const()[name = string("op_8108_pad_0"), val = tensor([0, 0])]; tensor var_8108_dilations_0 = const()[name = string("op_8108_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917994240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923237184))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8093_cast_fp16 = transpose(perm = var_8092, x = attn_output_135_cast_fp16)[name = string("transpose_39")]; tensor var_8108_cast_fp16 = conv(dilations = var_8108_dilations_0, groups = var_8108_groups_0, pad = var_8108_pad_0, pad_type = var_8108_pad_type_0, strides = var_8108_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8093_cast_fp16)[name = string("op_8108_cast_fp16")]; tensor var_8112 = const()[name = string("op_8112"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_8112, x = var_8108_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_8131_axes_0 = const()[name = string("op_8131_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923247488)))]; fp16 var_8119_to_fp16 = const()[name = string("op_8119_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8131_cast_fp16 = layer_norm(axes = var_8131_axes_0, epsilon = var_8119_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_8131_cast_fp16")]; tensor var_8145 = const()[name = string("op_8145"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_8146 = transpose(perm = var_8145, x = var_8131_cast_fp16)[name = string("transpose_37")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_8146)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_8168_axes_0 = const()[name = string("op_8168_axes_0"), val = tensor([2])]; tensor var_8168 = squeeze(axes = var_8168_axes_0, x = e_27)[name = string("op_8168")]; tensor var_8169 = const()[name = string("op_8169"), val = tensor([0, 2, 1])]; tensor var_8170 = transpose(perm = var_8169, x = var_8168)[name = string("transpose_36")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_8170)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_8188_axes_0 = const()[name = string("op_8188_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923252672)))]; fp16 var_8176_to_fp16 = const()[name = string("op_8176_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8188_cast_fp16 = layer_norm(axes = var_8188_axes_0, epsilon = var_8176_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_8188_cast_fp16")]; tensor var_8200 = const()[name = string("op_8200"), val = tensor([0, 2, 1])]; tensor var_8203_axes_0 = const()[name = string("op_8203_axes_0"), val = tensor([2])]; tensor var_8201 = transpose(perm = var_8200, x = var_8188_cast_fp16)[name = string("transpose_35")]; tensor var_8203 = expand_dims(axes = var_8203_axes_0, x = var_8201)[name = string("op_8203")]; string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_8203)[name = string("query_states_113")]; string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_8203)[name = string("key_states_141")]; string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_8203)[name = string("value_states_113")]; tensor var_8245 = const()[name = string("op_8245"), val = tensor([1, 32, 128, 64])]; tensor var_8246 = reshape(shape = var_8245, x = query_states_113)[name = string("op_8246")]; tensor var_8251 = const()[name = string("op_8251"), val = tensor([0, 1, 3, 2])]; tensor var_8256 = const()[name = string("op_8256"), val = tensor([1, 8, 128, 64])]; tensor var_8257 = reshape(shape = var_8256, x = key_states_141)[name = string("op_8257")]; tensor var_8262 = const()[name = string("op_8262"), val = tensor([0, 1, 3, 2])]; tensor var_8267 = const()[name = string("op_8267"), val = tensor([1, 8, 128, 64])]; tensor var_8268 = reshape(shape = var_8267, x = value_states_113)[name = string("op_8268")]; tensor var_8273 = const()[name = string("op_8273"), val = tensor([0, 1, 3, 2])]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor x_281 = transpose(perm = var_8251, x = var_8246)[name = string("transpose_34")]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = x_281)[name = string("mean_115")]; tensor input_257 = sub(x = x_281, y = mean_115)[name = string("input_257")]; tensor var_8290_axes_0 = const()[name = string("op_8290_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923257856)))]; fp16 var_8278_to_fp16 = const()[name = string("op_8278_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8290_cast_fp16 = layer_norm(axes = var_8290_axes_0, epsilon = var_8278_to_fp16, gamma = model_model_layers_14_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_8290_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor x_283 = transpose(perm = var_8262, x = var_8257)[name = string("transpose_33")]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = x_283)[name = string("mean_117")]; tensor input_259 = sub(x = x_283, y = mean_117)[name = string("input_259")]; tensor var_8308_axes_0 = const()[name = string("op_8308_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258176)))]; fp16 var_8296_to_fp16 = const()[name = string("op_8296_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8308_cast_fp16 = layer_norm(axes = var_8308_axes_0, epsilon = var_8296_to_fp16, gamma = model_model_layers_14_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_8308_cast_fp16")]; tensor var_8323 = mul(x = var_8290_cast_fp16, y = cos_5)[name = string("op_8323")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_8290_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_8290_cast_fp16)[name = string("x2_57")]; fp16 const_315_promoted = const()[name = string("const_315_promoted"), val = fp16(-0x1p+0)]; tensor var_8344 = mul(x = x2_57, y = const_315_promoted)[name = string("op_8344")]; int32 var_8346 = const()[name = string("op_8346"), val = int32(-1)]; bool var_8347_interleave_0 = const()[name = string("op_8347_interleave_0"), val = bool(false)]; tensor var_8347 = concat(axis = var_8346, interleave = var_8347_interleave_0, values = (var_8344, x1_57))[name = string("op_8347")]; tensor var_8348 = mul(x = var_8347, y = sin_5)[name = string("op_8348")]; tensor query_states_115 = add(x = var_8323, y = var_8348)[name = string("query_states_115")]; tensor var_8351 = mul(x = var_8308_cast_fp16, y = cos_5)[name = string("op_8351")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_8308_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_8308_cast_fp16)[name = string("x2_59")]; fp16 const_318_promoted = const()[name = string("const_318_promoted"), val = fp16(-0x1p+0)]; tensor var_8372 = mul(x = x2_59, y = const_318_promoted)[name = string("op_8372")]; int32 var_8374 = const()[name = string("op_8374"), val = int32(-1)]; bool var_8375_interleave_0 = const()[name = string("op_8375_interleave_0"), val = bool(false)]; tensor var_8375 = concat(axis = var_8374, interleave = var_8375_interleave_0, values = (var_8372, x1_59))[name = string("op_8375")]; tensor var_8376 = mul(x = var_8375, y = sin_5)[name = string("op_8376")]; tensor key_states_143 = add(x = var_8351, y = var_8376)[name = string("key_states_143")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, var_1230, concat_255_values3_0))[name = string("concat_255")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_143, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_64")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([50])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([51])]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, var_1230, concat_259_values3_0))[name = string("concat_259")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_115 = transpose(perm = var_8273, x = var_8268)[name = string("transpose_32")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = value_states_115, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_65")]; tensor var_8447_begin_0 = const()[name = string("op_8447_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_8447_end_0 = const()[name = string("op_8447_end_0"), val = tensor([15, 8, 1024, 128])]; tensor var_8447_end_mask_0 = const()[name = string("op_8447_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8447_cast_fp16 = slice_by_index(begin = var_8447_begin_0, end = var_8447_end_0, end_mask = var_8447_end_mask_0, x = coreml_update_state_65)[name = string("op_8447_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_8447_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_8454_begin_0 = const()[name = string("op_8454_begin_0"), val = tensor([50, 0, 0, 0])]; tensor var_8454_end_0 = const()[name = string("op_8454_end_0"), val = tensor([51, 8, 1024, 128])]; tensor var_8454_end_mask_0 = const()[name = string("op_8454_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8454_cast_fp16 = slice_by_index(begin = var_8454_begin_0, end = var_8454_end_0, end_mask = var_8454_end_mask_0, x = coreml_update_state_65)[name = string("op_8454_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_8454_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_8483 = const()[name = string("op_8483"), val = tensor([1, 4, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_8483, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_8495 = const()[name = string("op_8495"), val = tensor([1, -1, 1024, 128])]; tensor key_states_147_cast_fp16 = reshape(shape = var_8495, x = x_289_cast_fp16)[name = string("key_states_147_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_8503 = const()[name = string("op_8503"), val = tensor([1, 4, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_8503, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; bool var_8530_transpose_x_0 = const()[name = string("op_8530_transpose_x_0"), val = bool(false)]; bool var_8530_transpose_y_0 = const()[name = string("op_8530_transpose_y_0"), val = bool(true)]; tensor var_8530 = matmul(transpose_x = var_8530_transpose_x_0, transpose_y = var_8530_transpose_y_0, x = query_states_115, y = key_states_147_cast_fp16)[name = string("op_8530")]; fp16 var_8531_to_fp16 = const()[name = string("op_8531_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_57_cast_fp16 = mul(x = var_8530, y = var_8531_to_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask)[name = string("attn_weights_59_cast_fp16")]; int32 var_8566 = const()[name = string("op_8566"), val = int32(-1)]; tensor var_8568_cast_fp16 = softmax(axis = var_8566, x = attn_weights_59_cast_fp16)[name = string("op_8568_cast_fp16")]; tensor concat_264 = const()[name = string("concat_264"), val = tensor([32, 64, 1024])]; tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_8568_cast_fp16)[name = string("reshape_42_cast_fp16")]; tensor concat_265 = const()[name = string("concat_265"), val = tensor([32, 1024, 128])]; tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_295_cast_fp16)[name = string("reshape_43_cast_fp16")]; bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 32, 64, 128])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor var_8580_perm_0 = const()[name = string("op_8580_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8599 = const()[name = string("op_8599"), val = tensor([1, 64, 4096])]; tensor var_8580_cast_fp16 = transpose(perm = var_8580_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_31")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_8599, x = var_8580_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_8604 = const()[name = string("op_8604"), val = tensor([0, 2, 1])]; string var_8620_pad_type_0 = const()[name = string("op_8620_pad_type_0"), val = string("valid")]; int32 var_8620_groups_0 = const()[name = string("op_8620_groups_0"), val = int32(1)]; tensor var_8620_strides_0 = const()[name = string("op_8620_strides_0"), val = tensor([1])]; tensor var_8620_pad_0 = const()[name = string("op_8620_pad_0"), val = tensor([0, 0])]; tensor var_8620_dilations_0 = const()[name = string("op_8620_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928501440))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8605_cast_fp16 = transpose(perm = var_8604, x = attn_output_145_cast_fp16)[name = string("transpose_30")]; tensor var_8620_cast_fp16 = conv(dilations = var_8620_dilations_0, groups = var_8620_groups_0, pad = var_8620_pad_0, pad_type = var_8620_pad_type_0, strides = var_8620_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_8605_cast_fp16)[name = string("op_8620_cast_fp16")]; tensor var_8624 = const()[name = string("op_8624"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_8624, x = var_8620_cast_fp16)[name = string("transpose_29")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_8643_axes_0 = const()[name = string("op_8643_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928511744)))]; fp16 var_8631_to_fp16 = const()[name = string("op_8631_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8643_cast_fp16 = layer_norm(axes = var_8643_axes_0, epsilon = var_8631_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_8643_cast_fp16")]; tensor var_8657 = const()[name = string("op_8657"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_8658 = transpose(perm = var_8657, x = var_8643_cast_fp16)[name = string("transpose_28")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_8658)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_8680_axes_0 = const()[name = string("op_8680_axes_0"), val = tensor([2])]; tensor var_8680 = squeeze(axes = var_8680_axes_0, x = e_29)[name = string("op_8680")]; tensor var_8681 = const()[name = string("op_8681"), val = tensor([0, 2, 1])]; tensor var_8682 = transpose(perm = var_8681, x = var_8680)[name = string("transpose_27")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_8682)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_8700_axes_0 = const()[name = string("op_8700_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928516928)))]; fp16 var_8688_to_fp16 = const()[name = string("op_8688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8700_cast_fp16 = layer_norm(axes = var_8700_axes_0, epsilon = var_8688_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_8700_cast_fp16")]; tensor var_8712 = const()[name = string("op_8712"), val = tensor([0, 2, 1])]; tensor var_8715_axes_0 = const()[name = string("op_8715_axes_0"), val = tensor([2])]; tensor var_8713 = transpose(perm = var_8712, x = var_8700_cast_fp16)[name = string("transpose_26")]; tensor var_8715 = expand_dims(axes = var_8715_axes_0, x = var_8713)[name = string("op_8715")]; string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_8715)[name = string("query_states_121")]; string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_8715)[name = string("key_states_151")]; string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_8715)[name = string("value_states_121")]; tensor var_8757 = const()[name = string("op_8757"), val = tensor([1, 32, 128, 64])]; tensor var_8758 = reshape(shape = var_8757, x = query_states_121)[name = string("op_8758")]; tensor var_8763 = const()[name = string("op_8763"), val = tensor([0, 1, 3, 2])]; tensor var_8768 = const()[name = string("op_8768"), val = tensor([1, 8, 128, 64])]; tensor var_8769 = reshape(shape = var_8768, x = key_states_151)[name = string("op_8769")]; tensor var_8774 = const()[name = string("op_8774"), val = tensor([0, 1, 3, 2])]; tensor var_8779 = const()[name = string("op_8779"), val = tensor([1, 8, 128, 64])]; tensor var_8780 = reshape(shape = var_8779, x = value_states_121)[name = string("op_8780")]; tensor var_8785 = const()[name = string("op_8785"), val = tensor([0, 1, 3, 2])]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor x_301 = transpose(perm = var_8763, x = var_8758)[name = string("transpose_25")]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = x_301)[name = string("mean_123")]; tensor input_275 = sub(x = x_301, y = mean_123)[name = string("input_275")]; tensor var_8802_axes_0 = const()[name = string("op_8802_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522112)))]; fp16 var_8790_to_fp16 = const()[name = string("op_8790_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8802_cast_fp16 = layer_norm(axes = var_8802_axes_0, epsilon = var_8790_to_fp16, gamma = model_model_layers_15_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_8802_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor x_303 = transpose(perm = var_8774, x = var_8769)[name = string("transpose_24")]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = x_303)[name = string("mean_125")]; tensor input_277 = sub(x = x_303, y = mean_125)[name = string("input_277")]; tensor var_8820_axes_0 = const()[name = string("op_8820_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522432)))]; fp16 var_8808_to_fp16 = const()[name = string("op_8808_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8820_cast_fp16 = layer_norm(axes = var_8820_axes_0, epsilon = var_8808_to_fp16, gamma = model_model_layers_15_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_8820_cast_fp16")]; tensor var_8835 = mul(x = var_8802_cast_fp16, y = cos_5)[name = string("op_8835")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_8802_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_8802_cast_fp16)[name = string("x2_61")]; fp16 const_337_promoted = const()[name = string("const_337_promoted"), val = fp16(-0x1p+0)]; tensor var_8856 = mul(x = x2_61, y = const_337_promoted)[name = string("op_8856")]; int32 var_8858 = const()[name = string("op_8858"), val = int32(-1)]; bool var_8859_interleave_0 = const()[name = string("op_8859_interleave_0"), val = bool(false)]; tensor var_8859 = concat(axis = var_8858, interleave = var_8859_interleave_0, values = (var_8856, x1_61))[name = string("op_8859")]; tensor var_8860 = mul(x = var_8859, y = sin_5)[name = string("op_8860")]; tensor query_states_123 = add(x = var_8835, y = var_8860)[name = string("query_states_123")]; tensor var_8863 = mul(x = var_8820_cast_fp16, y = cos_5)[name = string("op_8863")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_8820_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_8820_cast_fp16)[name = string("x2_63")]; fp16 const_340_promoted = const()[name = string("const_340_promoted"), val = fp16(-0x1p+0)]; tensor var_8884 = mul(x = x2_63, y = const_340_promoted)[name = string("op_8884")]; int32 var_8886 = const()[name = string("op_8886"), val = int32(-1)]; bool var_8887_interleave_0 = const()[name = string("op_8887_interleave_0"), val = bool(false)]; tensor var_8887 = concat(axis = var_8886, interleave = var_8887_interleave_0, values = (var_8884, x1_63))[name = string("op_8887")]; tensor var_8888 = mul(x = var_8887, y = sin_5)[name = string("op_8888")]; tensor key_states_153 = add(x = var_8863, y = var_8888)[name = string("key_states_153")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, var_1230, concat_273_values3_0))[name = string("concat_273")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_153, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_66")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([51])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([52])]; int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, var_1230, concat_277_values3_0))[name = string("concat_277")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_123 = transpose(perm = var_8785, x = var_8780)[name = string("transpose_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = value_states_123, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_67")]; tensor var_8959_begin_0 = const()[name = string("op_8959_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_8959_end_0 = const()[name = string("op_8959_end_0"), val = tensor([16, 8, 1024, 128])]; tensor var_8959_end_mask_0 = const()[name = string("op_8959_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8959_cast_fp16 = slice_by_index(begin = var_8959_begin_0, end = var_8959_end_0, end_mask = var_8959_end_mask_0, x = coreml_update_state_67)[name = string("op_8959_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_8959_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_8966_begin_0 = const()[name = string("op_8966_begin_0"), val = tensor([51, 0, 0, 0])]; tensor var_8966_end_0 = const()[name = string("op_8966_end_0"), val = tensor([52, 8, 1024, 128])]; tensor var_8966_end_mask_0 = const()[name = string("op_8966_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8966_cast_fp16 = slice_by_index(begin = var_8966_begin_0, end = var_8966_end_0, end_mask = var_8966_end_mask_0, x = coreml_update_state_67)[name = string("op_8966_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_8966_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_8995 = const()[name = string("op_8995"), val = tensor([1, 4, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_8995, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_9007 = const()[name = string("op_9007"), val = tensor([1, -1, 1024, 128])]; tensor key_states_157_cast_fp16 = reshape(shape = var_9007, x = x_309_cast_fp16)[name = string("key_states_157_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_9015 = const()[name = string("op_9015"), val = tensor([1, 4, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_9015, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; bool var_9042_transpose_x_0 = const()[name = string("op_9042_transpose_x_0"), val = bool(false)]; bool var_9042_transpose_y_0 = const()[name = string("op_9042_transpose_y_0"), val = bool(true)]; tensor var_9042 = matmul(transpose_x = var_9042_transpose_x_0, transpose_y = var_9042_transpose_y_0, x = query_states_123, y = key_states_157_cast_fp16)[name = string("op_9042")]; fp16 var_9043_to_fp16 = const()[name = string("op_9043_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_9042, y = var_9043_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_9078 = const()[name = string("op_9078"), val = int32(-1)]; tensor var_9080_cast_fp16 = softmax(axis = var_9078, x = attn_weights_63_cast_fp16)[name = string("op_9080_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([32, 64, 1024])]; tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_9080_cast_fp16)[name = string("reshape_45_cast_fp16")]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([32, 1024, 128])]; tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_315_cast_fp16)[name = string("reshape_46_cast_fp16")]; bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 32, 64, 128])]; tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor var_9092_perm_0 = const()[name = string("op_9092_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9111 = const()[name = string("op_9111"), val = tensor([1, 64, 4096])]; tensor var_9092_cast_fp16 = transpose(perm = var_9092_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_22")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_9111, x = var_9092_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_9116 = const()[name = string("op_9116"), val = tensor([0, 2, 1])]; string var_9132_pad_type_0 = const()[name = string("op_9132_pad_type_0"), val = string("valid")]; int32 var_9132_groups_0 = const()[name = string("op_9132_groups_0"), val = int32(1)]; tensor var_9132_strides_0 = const()[name = string("op_9132_strides_0"), val = tensor([1])]; tensor var_9132_pad_0 = const()[name = string("op_9132_pad_0"), val = tensor([0, 0])]; tensor var_9132_dilations_0 = const()[name = string("op_9132_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933765696))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9117_cast_fp16 = transpose(perm = var_9116, x = attn_output_155_cast_fp16)[name = string("transpose_21")]; tensor var_9132_cast_fp16 = conv(dilations = var_9132_dilations_0, groups = var_9132_groups_0, pad = var_9132_pad_0, pad_type = var_9132_pad_type_0, strides = var_9132_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_9117_cast_fp16)[name = string("op_9132_cast_fp16")]; tensor var_9136 = const()[name = string("op_9136"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_9136, x = var_9132_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_9155_axes_0 = const()[name = string("op_9155_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933776000)))]; fp16 var_9143_to_fp16 = const()[name = string("op_9143_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9155_cast_fp16 = layer_norm(axes = var_9155_axes_0, epsilon = var_9143_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_9155_cast_fp16")]; tensor var_9169 = const()[name = string("op_9169"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_9170 = transpose(perm = var_9169, x = var_9155_cast_fp16)[name = string("transpose_19")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_9170)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_9192_axes_0 = const()[name = string("op_9192_axes_0"), val = tensor([2])]; tensor var_9192 = squeeze(axes = var_9192_axes_0, x = e_31)[name = string("op_9192")]; tensor var_9193 = const()[name = string("op_9193"), val = tensor([0, 2, 1])]; tensor var_9194 = transpose(perm = var_9193, x = var_9192)[name = string("transpose_18")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_9194)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_9212_axes_0 = const()[name = string("op_9212_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933781184)))]; fp16 var_9200_to_fp16 = const()[name = string("op_9200_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9212_cast_fp16 = layer_norm(axes = var_9212_axes_0, epsilon = var_9200_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_9212_cast_fp16")]; tensor var_9224 = const()[name = string("op_9224"), val = tensor([0, 2, 1])]; tensor var_9227_axes_0 = const()[name = string("op_9227_axes_0"), val = tensor([2])]; tensor var_9225 = transpose(perm = var_9224, x = var_9212_cast_fp16)[name = string("transpose_17")]; tensor var_9227 = expand_dims(axes = var_9227_axes_0, x = var_9225)[name = string("op_9227")]; string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_9227)[name = string("query_states_129")]; string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_9227)[name = string("key_states_161")]; string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_9227)[name = string("value_states_129")]; tensor var_9269 = const()[name = string("op_9269"), val = tensor([1, 32, 128, 64])]; tensor var_9270 = reshape(shape = var_9269, x = query_states_129)[name = string("op_9270")]; tensor var_9275 = const()[name = string("op_9275"), val = tensor([0, 1, 3, 2])]; tensor var_9280 = const()[name = string("op_9280"), val = tensor([1, 8, 128, 64])]; tensor var_9281 = reshape(shape = var_9280, x = key_states_161)[name = string("op_9281")]; tensor var_9286 = const()[name = string("op_9286"), val = tensor([0, 1, 3, 2])]; tensor var_9291 = const()[name = string("op_9291"), val = tensor([1, 8, 128, 64])]; tensor var_9292 = reshape(shape = var_9291, x = value_states_129)[name = string("op_9292")]; tensor var_9297 = const()[name = string("op_9297"), val = tensor([0, 1, 3, 2])]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor x_321 = transpose(perm = var_9275, x = var_9270)[name = string("transpose_16")]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = x_321)[name = string("mean_131")]; tensor input_293 = sub(x = x_321, y = mean_131)[name = string("input_293")]; tensor var_9314_axes_0 = const()[name = string("op_9314_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786368)))]; fp16 var_9302_to_fp16 = const()[name = string("op_9302_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9314_cast_fp16 = layer_norm(axes = var_9314_axes_0, epsilon = var_9302_to_fp16, gamma = model_model_layers_16_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_9314_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor x_323 = transpose(perm = var_9286, x = var_9281)[name = string("transpose_15")]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = x_323)[name = string("mean_133")]; tensor input_295 = sub(x = x_323, y = mean_133)[name = string("input_295")]; tensor var_9332_axes_0 = const()[name = string("op_9332_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786688)))]; fp16 var_9320_to_fp16 = const()[name = string("op_9320_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9332_cast_fp16 = layer_norm(axes = var_9332_axes_0, epsilon = var_9320_to_fp16, gamma = model_model_layers_16_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_9332_cast_fp16")]; tensor var_9347 = mul(x = var_9314_cast_fp16, y = cos_5)[name = string("op_9347")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_9314_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_9314_cast_fp16)[name = string("x2_65")]; fp16 const_359_promoted = const()[name = string("const_359_promoted"), val = fp16(-0x1p+0)]; tensor var_9368 = mul(x = x2_65, y = const_359_promoted)[name = string("op_9368")]; int32 var_9370 = const()[name = string("op_9370"), val = int32(-1)]; bool var_9371_interleave_0 = const()[name = string("op_9371_interleave_0"), val = bool(false)]; tensor var_9371 = concat(axis = var_9370, interleave = var_9371_interleave_0, values = (var_9368, x1_65))[name = string("op_9371")]; tensor var_9372 = mul(x = var_9371, y = sin_5)[name = string("op_9372")]; tensor query_states_131 = add(x = var_9347, y = var_9372)[name = string("query_states_131")]; tensor var_9375 = mul(x = var_9332_cast_fp16, y = cos_5)[name = string("op_9375")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_9332_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_9332_cast_fp16)[name = string("x2_67")]; fp16 const_362_promoted = const()[name = string("const_362_promoted"), val = fp16(-0x1p+0)]; tensor var_9396 = mul(x = x2_67, y = const_362_promoted)[name = string("op_9396")]; int32 var_9398 = const()[name = string("op_9398"), val = int32(-1)]; bool var_9399_interleave_0 = const()[name = string("op_9399_interleave_0"), val = bool(false)]; tensor var_9399 = concat(axis = var_9398, interleave = var_9399_interleave_0, values = (var_9396, x1_67))[name = string("op_9399")]; tensor var_9400 = mul(x = var_9399, y = sin_5)[name = string("op_9400")]; tensor key_states_163 = add(x = var_9375, y = var_9400)[name = string("key_states_163")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, var_1230, concat_291_values3_0))[name = string("concat_291")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_163, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_68")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([52])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([53])]; int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, var_1230, concat_295_values3_0))[name = string("concat_295")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_131 = transpose(perm = var_9297, x = var_9292)[name = string("transpose_14")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = value_states_131, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_69")]; tensor var_9471_begin_0 = const()[name = string("op_9471_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_9471_end_0 = const()[name = string("op_9471_end_0"), val = tensor([17, 8, 1024, 128])]; tensor var_9471_end_mask_0 = const()[name = string("op_9471_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9471_cast_fp16 = slice_by_index(begin = var_9471_begin_0, end = var_9471_end_0, end_mask = var_9471_end_mask_0, x = coreml_update_state_69)[name = string("op_9471_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_9471_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_9478_begin_0 = const()[name = string("op_9478_begin_0"), val = tensor([52, 0, 0, 0])]; tensor var_9478_end_0 = const()[name = string("op_9478_end_0"), val = tensor([53, 8, 1024, 128])]; tensor var_9478_end_mask_0 = const()[name = string("op_9478_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9478_cast_fp16 = slice_by_index(begin = var_9478_begin_0, end = var_9478_end_0, end_mask = var_9478_end_mask_0, x = coreml_update_state_69)[name = string("op_9478_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_9478_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_9507 = const()[name = string("op_9507"), val = tensor([1, 4, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_9507, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_9519 = const()[name = string("op_9519"), val = tensor([1, -1, 1024, 128])]; tensor key_states_167_cast_fp16 = reshape(shape = var_9519, x = x_329_cast_fp16)[name = string("key_states_167_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_9527 = const()[name = string("op_9527"), val = tensor([1, 4, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_9527, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; bool var_9554_transpose_x_0 = const()[name = string("op_9554_transpose_x_0"), val = bool(false)]; bool var_9554_transpose_y_0 = const()[name = string("op_9554_transpose_y_0"), val = bool(true)]; tensor var_9554 = matmul(transpose_x = var_9554_transpose_x_0, transpose_y = var_9554_transpose_y_0, x = query_states_131, y = key_states_167_cast_fp16)[name = string("op_9554")]; fp16 var_9555_to_fp16 = const()[name = string("op_9555_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_65_cast_fp16 = mul(x = var_9554, y = var_9555_to_fp16)[name = string("attn_weights_65_cast_fp16")]; tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = causal_mask)[name = string("attn_weights_67_cast_fp16")]; int32 var_9590 = const()[name = string("op_9590"), val = int32(-1)]; tensor var_9592_cast_fp16 = softmax(axis = var_9590, x = attn_weights_67_cast_fp16)[name = string("op_9592_cast_fp16")]; tensor concat_300 = const()[name = string("concat_300"), val = tensor([32, 64, 1024])]; tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_9592_cast_fp16)[name = string("reshape_48_cast_fp16")]; tensor concat_301 = const()[name = string("concat_301"), val = tensor([32, 1024, 128])]; tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_335_cast_fp16)[name = string("reshape_49_cast_fp16")]; bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 32, 64, 128])]; tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; tensor var_9604_perm_0 = const()[name = string("op_9604_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9623 = const()[name = string("op_9623"), val = tensor([1, 64, 4096])]; tensor var_9604_cast_fp16 = transpose(perm = var_9604_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_13")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_9623, x = var_9604_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_9628 = const()[name = string("op_9628"), val = tensor([0, 2, 1])]; string var_9644_pad_type_0 = const()[name = string("op_9644_pad_type_0"), val = string("valid")]; int32 var_9644_groups_0 = const()[name = string("op_9644_groups_0"), val = int32(1)]; tensor var_9644_strides_0 = const()[name = string("op_9644_strides_0"), val = tensor([1])]; tensor var_9644_pad_0 = const()[name = string("op_9644_pad_0"), val = tensor([0, 0])]; tensor var_9644_dilations_0 = const()[name = string("op_9644_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939029952))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9629_cast_fp16 = transpose(perm = var_9628, x = attn_output_165_cast_fp16)[name = string("transpose_12")]; tensor var_9644_cast_fp16 = conv(dilations = var_9644_dilations_0, groups = var_9644_groups_0, pad = var_9644_pad_0, pad_type = var_9644_pad_type_0, strides = var_9644_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_9629_cast_fp16)[name = string("op_9644_cast_fp16")]; tensor var_9648 = const()[name = string("op_9648"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_9648, x = var_9644_cast_fp16)[name = string("transpose_11")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_9667_axes_0 = const()[name = string("op_9667_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939040256)))]; fp16 var_9655_to_fp16 = const()[name = string("op_9655_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9667_cast_fp16 = layer_norm(axes = var_9667_axes_0, epsilon = var_9655_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_9667_cast_fp16")]; tensor var_9681 = const()[name = string("op_9681"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_9682 = transpose(perm = var_9681, x = var_9667_cast_fp16)[name = string("transpose_10")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_9682)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_9704_axes_0 = const()[name = string("op_9704_axes_0"), val = tensor([2])]; tensor var_9704 = squeeze(axes = var_9704_axes_0, x = e_33)[name = string("op_9704")]; tensor var_9705 = const()[name = string("op_9705"), val = tensor([0, 2, 1])]; tensor var_9706 = transpose(perm = var_9705, x = var_9704)[name = string("transpose_9")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_9706)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_9724_axes_0 = const()[name = string("op_9724_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939045440)))]; fp16 var_9712_to_fp16 = const()[name = string("op_9712_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9724_cast_fp16 = layer_norm(axes = var_9724_axes_0, epsilon = var_9712_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_9724_cast_fp16")]; tensor var_9736 = const()[name = string("op_9736"), val = tensor([0, 2, 1])]; tensor var_9739_axes_0 = const()[name = string("op_9739_axes_0"), val = tensor([2])]; tensor var_9737 = transpose(perm = var_9736, x = var_9724_cast_fp16)[name = string("transpose_8")]; tensor var_9739 = expand_dims(axes = var_9739_axes_0, x = var_9737)[name = string("op_9739")]; string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_9739)[name = string("query_states_137")]; string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_9739)[name = string("key_states_171")]; string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_9739)[name = string("value_states_137")]; tensor var_9781 = const()[name = string("op_9781"), val = tensor([1, 32, 128, 64])]; tensor var_9782 = reshape(shape = var_9781, x = query_states_137)[name = string("op_9782")]; tensor var_9787 = const()[name = string("op_9787"), val = tensor([0, 1, 3, 2])]; tensor var_9792 = const()[name = string("op_9792"), val = tensor([1, 8, 128, 64])]; tensor var_9793 = reshape(shape = var_9792, x = key_states_171)[name = string("op_9793")]; tensor var_9798 = const()[name = string("op_9798"), val = tensor([0, 1, 3, 2])]; tensor var_9803 = const()[name = string("op_9803"), val = tensor([1, 8, 128, 64])]; tensor var_9804 = reshape(shape = var_9803, x = value_states_137)[name = string("op_9804")]; tensor var_9809 = const()[name = string("op_9809"), val = tensor([0, 1, 3, 2])]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor x_341 = transpose(perm = var_9787, x = var_9782)[name = string("transpose_7")]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = x_341)[name = string("mean_139")]; tensor input_311 = sub(x = x_341, y = mean_139)[name = string("input_311")]; tensor var_9826_axes_0 = const()[name = string("op_9826_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050624)))]; fp16 var_9814_to_fp16 = const()[name = string("op_9814_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9826_cast_fp16 = layer_norm(axes = var_9826_axes_0, epsilon = var_9814_to_fp16, gamma = model_model_layers_17_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_9826_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor x_343 = transpose(perm = var_9798, x = var_9793)[name = string("transpose_6")]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = x_343)[name = string("mean_141")]; tensor input_313 = sub(x = x_343, y = mean_141)[name = string("input_313")]; tensor var_9844_axes_0 = const()[name = string("op_9844_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050944)))]; fp16 var_9832_to_fp16 = const()[name = string("op_9832_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9844_cast_fp16 = layer_norm(axes = var_9844_axes_0, epsilon = var_9832_to_fp16, gamma = model_model_layers_17_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_9844_cast_fp16")]; tensor var_9859 = mul(x = var_9826_cast_fp16, y = cos_5)[name = string("op_9859")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_9826_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_9826_cast_fp16)[name = string("x2_69")]; fp16 const_381_promoted = const()[name = string("const_381_promoted"), val = fp16(-0x1p+0)]; tensor var_9880 = mul(x = x2_69, y = const_381_promoted)[name = string("op_9880")]; int32 var_9882 = const()[name = string("op_9882"), val = int32(-1)]; bool var_9883_interleave_0 = const()[name = string("op_9883_interleave_0"), val = bool(false)]; tensor var_9883 = concat(axis = var_9882, interleave = var_9883_interleave_0, values = (var_9880, x1_69))[name = string("op_9883")]; tensor var_9884 = mul(x = var_9883, y = sin_5)[name = string("op_9884")]; tensor query_states_139 = add(x = var_9859, y = var_9884)[name = string("query_states_139")]; tensor var_9887 = mul(x = var_9844_cast_fp16, y = cos_5)[name = string("op_9887")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_9844_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_9844_cast_fp16)[name = string("x2")]; fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; tensor var_9908 = mul(x = x2, y = const_384_promoted)[name = string("op_9908")]; int32 var_9910 = const()[name = string("op_9910"), val = int32(-1)]; bool var_9911_interleave_0 = const()[name = string("op_9911_interleave_0"), val = bool(false)]; tensor var_9911 = concat(axis = var_9910, interleave = var_9911_interleave_0, values = (var_9908, x1))[name = string("op_9911")]; tensor var_9912 = mul(x = var_9911, y = sin_5)[name = string("op_9912")]; tensor key_states_173 = add(x = var_9887, y = var_9912)[name = string("key_states_173")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; int32 concat_308_axis_0 = const()[name = string("concat_308_axis_0"), val = int32(0)]; bool concat_308_interleave_0 = const()[name = string("concat_308_interleave_0"), val = bool(false)]; tensor concat_308 = concat(axis = concat_308_axis_0, interleave = concat_308_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_308")]; tensor concat_309_values1_0 = const()[name = string("concat_309_values1_0"), val = tensor([0])]; tensor concat_309_values3_0 = const()[name = string("concat_309_values3_0"), val = tensor([0])]; int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (expand_dims_208, concat_309_values1_0, var_1230, concat_309_values3_0))[name = string("concat_309")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_173, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_70")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([53])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([54])]; int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_312")]; tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (expand_dims_214, concat_313_values1_0, var_1230, concat_313_values3_0))[name = string("concat_313")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_139 = transpose(perm = var_9809, x = var_9804)[name = string("transpose_5")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_312, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_313, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = value_states_139, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_71")]; tensor var_9983_begin_0 = const()[name = string("op_9983_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_9983_end_0 = const()[name = string("op_9983_end_0"), val = tensor([18, 8, 1024, 128])]; tensor var_9983_end_mask_0 = const()[name = string("op_9983_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9983_cast_fp16 = slice_by_index(begin = var_9983_begin_0, end = var_9983_end_0, end_mask = var_9983_end_mask_0, x = coreml_update_state_71)[name = string("op_9983_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_9983_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_9990_begin_0 = const()[name = string("op_9990_begin_0"), val = tensor([53, 0, 0, 0])]; tensor var_9990_end_0 = const()[name = string("op_9990_end_0"), val = tensor([54, 8, 1024, 128])]; tensor var_9990_end_mask_0 = const()[name = string("op_9990_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9990_cast_fp16 = slice_by_index(begin = var_9990_begin_0, end = var_9990_end_0, end_mask = var_9990_end_mask_0, x = coreml_update_state_71)[name = string("op_9990_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_9990_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_10019 = const()[name = string("op_10019"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_10019, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_10031 = const()[name = string("op_10031"), val = tensor([1, -1, 1024, 128])]; tensor key_states_177_cast_fp16 = reshape(shape = var_10031, x = x_349_cast_fp16)[name = string("key_states_177_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_10039 = const()[name = string("op_10039"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_10039, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; bool var_10066_transpose_x_0 = const()[name = string("op_10066_transpose_x_0"), val = bool(false)]; bool var_10066_transpose_y_0 = const()[name = string("op_10066_transpose_y_0"), val = bool(true)]; tensor var_10066 = matmul(transpose_x = var_10066_transpose_x_0, transpose_y = var_10066_transpose_y_0, x = query_states_139, y = key_states_177_cast_fp16)[name = string("op_10066")]; fp16 var_10067_to_fp16 = const()[name = string("op_10067_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_69_cast_fp16 = mul(x = var_10066, y = var_10067_to_fp16)[name = string("attn_weights_69_cast_fp16")]; tensor attn_weights_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_cast_fp16")]; int32 var_10102 = const()[name = string("op_10102"), val = int32(-1)]; tensor var_10104_cast_fp16 = softmax(axis = var_10102, x = attn_weights_cast_fp16)[name = string("op_10104_cast_fp16")]; tensor concat_318 = const()[name = string("concat_318"), val = tensor([32, 64, 1024])]; tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_10104_cast_fp16)[name = string("reshape_51_cast_fp16")]; tensor concat_319 = const()[name = string("concat_319"), val = tensor([32, 1024, 128])]; tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_355_cast_fp16)[name = string("reshape_52_cast_fp16")]; bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 32, 64, 128])]; tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; tensor var_10116_perm_0 = const()[name = string("op_10116_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10135 = const()[name = string("op_10135"), val = tensor([1, 64, 4096])]; tensor var_10116_cast_fp16 = transpose(perm = var_10116_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_4")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_10135, x = var_10116_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_10140 = const()[name = string("op_10140"), val = tensor([0, 2, 1])]; string var_10156_pad_type_0 = const()[name = string("op_10156_pad_type_0"), val = string("valid")]; int32 var_10156_groups_0 = const()[name = string("op_10156_groups_0"), val = int32(1)]; tensor var_10156_strides_0 = const()[name = string("op_10156_strides_0"), val = tensor([1])]; tensor var_10156_pad_0 = const()[name = string("op_10156_pad_0"), val = tensor([0, 0])]; tensor var_10156_dilations_0 = const()[name = string("op_10156_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939051264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944294208))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10141_cast_fp16 = transpose(perm = var_10140, x = attn_output_175_cast_fp16)[name = string("transpose_3")]; tensor var_10156_cast_fp16 = conv(dilations = var_10156_dilations_0, groups = var_10156_groups_0, pad = var_10156_pad_0, pad_type = var_10156_pad_type_0, strides = var_10156_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_10141_cast_fp16)[name = string("op_10156_cast_fp16")]; tensor var_10160 = const()[name = string("op_10160"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_10160, x = var_10156_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_10179_axes_0 = const()[name = string("op_10179_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944304512)))]; fp16 var_10167_to_fp16 = const()[name = string("op_10167_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10179_cast_fp16 = layer_norm(axes = var_10179_axes_0, epsilon = var_10167_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_10179_cast_fp16")]; tensor var_10193 = const()[name = string("op_10193"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_10194 = transpose(perm = var_10193, x = var_10179_cast_fp16)[name = string("transpose_1")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_10194)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b")]; tensor c = silu(x = input_321)[name = string("c")]; tensor input = mul(x = c, y = b)[name = string("input")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input)[name = string("e")]; tensor var_10216_axes_0 = const()[name = string("op_10216_axes_0"), val = tensor([2])]; tensor var_10216 = squeeze(axes = var_10216_axes_0, x = e)[name = string("op_10216")]; tensor var_10217 = const()[name = string("op_10217"), val = tensor([0, 2, 1])]; tensor var_10218 = transpose(perm = var_10217, x = var_10216)[name = string("transpose_0")]; tensor output_hidden_states = add(x = hidden_states_cast_fp16, y = var_10218)[name = string("op_10220_cast_fp16")]; } -> (output_hidden_states); }