program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.11.1"}, {"coremlc-version", "3500.21.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5243008))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5259456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6570240))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6574400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885184))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20341248))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20380224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32832128))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32871104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45323008))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50576256))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50592704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51903488))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51907648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53218432))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53222592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65674496))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78165376))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78204352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90656256))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90666560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95909504))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97236736))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97240896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98551680))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98555840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111007744))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111046720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123498624))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123537600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135989504))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135999808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141242752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141259200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142569984))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142574144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143884928))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143889088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156340992))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156379968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168831872))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168870848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181322752))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181333056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186576000))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186592448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187903232))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187907392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189218176))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189222336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201674240))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201713216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214165120))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214204096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226656000))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226666304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231909248))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231925696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233236480))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233240640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234551424))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234555584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247007488))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247046464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259498368))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259537344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271989248))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271999552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277242496))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278569728))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278573888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279884672))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279888832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292340736))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292379712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304831616))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304870592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317322496))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317332800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322575744))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322592192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323902976))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323907136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325217920))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325222080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337673984))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337712960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350164864))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350203840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362655744))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362666048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367908992))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367925440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369236224))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369240384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370551168))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370555328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383007232))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383046208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395498112))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395537088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407988992))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407999296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413242240))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413258688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414569472))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414573632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415884416))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415888576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428340480))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428379456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440831360))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440870336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453322240))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453332544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458575488))))[name = string("model_model_layers_28_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458591936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459902720))))[name = string("model_model_layers_28_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459906880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461217664))))[name = string("model_model_layers_28_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_28_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461221824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473673728))))[name = string("model_model_layers_28_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_28_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473712704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486164608))))[name = string("model_model_layers_28_mlp_up_proj_weight_palettized")]; tensor model_model_layers_28_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486203584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498655488))))[name = string("model_model_layers_28_mlp_down_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503908736))))[name = string("model_model_layers_29_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503925184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505235968))))[name = string("model_model_layers_29_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505240128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506550912))))[name = string("model_model_layers_29_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_29_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506555072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519006976))))[name = string("model_model_layers_29_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_29_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519045952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531497856))))[name = string("model_model_layers_29_mlp_up_proj_weight_palettized")]; tensor model_model_layers_29_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531536832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543988736))))[name = string("model_model_layers_29_mlp_down_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543999040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549241984))))[name = string("model_model_layers_30_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549258432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550569216))))[name = string("model_model_layers_30_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550573376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551884160))))[name = string("model_model_layers_30_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_30_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551888320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564340224))))[name = string("model_model_layers_30_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_30_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564379200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576831104))))[name = string("model_model_layers_30_mlp_up_proj_weight_palettized")]; tensor model_model_layers_30_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576870080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589321984))))[name = string("model_model_layers_30_mlp_down_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589332288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594575232))))[name = string("model_model_layers_31_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594591680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595902464))))[name = string("model_model_layers_31_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595906624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597217408))))[name = string("model_model_layers_31_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_31_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597221568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609673472))))[name = string("model_model_layers_31_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_31_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609712448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622164352))))[name = string("model_model_layers_31_mlp_up_proj_weight_palettized")]; tensor model_model_layers_31_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622203328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634655232))))[name = string("model_model_layers_31_mlp_down_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634665536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639908480))))[name = string("model_model_layers_32_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639924928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641235712))))[name = string("model_model_layers_32_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641239872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642550656))))[name = string("model_model_layers_32_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_32_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642554816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655006720))))[name = string("model_model_layers_32_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_32_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655045696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667497600))))[name = string("model_model_layers_32_mlp_up_proj_weight_palettized")]; tensor model_model_layers_32_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667536576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679988480))))[name = string("model_model_layers_32_mlp_down_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685241728))))[name = string("model_model_layers_33_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685258176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686568960))))[name = string("model_model_layers_33_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686573120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687883904))))[name = string("model_model_layers_33_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_33_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700339968))))[name = string("model_model_layers_33_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_33_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712830848))))[name = string("model_model_layers_33_mlp_up_proj_weight_palettized")]; tensor model_model_layers_33_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712869824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725321728))))[name = string("model_model_layers_33_mlp_down_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725332032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730574976))))[name = string("model_model_layers_34_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731902208))))[name = string("model_model_layers_34_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731906368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733217152))))[name = string("model_model_layers_34_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_34_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733221312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745673216))))[name = string("model_model_layers_34_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_34_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745712192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758164096))))[name = string("model_model_layers_34_mlp_up_proj_weight_palettized")]; tensor model_model_layers_34_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758203072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770654976))))[name = string("model_model_layers_34_mlp_down_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770665280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775908224))))[name = string("model_model_layers_35_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775924672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777235456))))[name = string("model_model_layers_35_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777239616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778550400))))[name = string("model_model_layers_35_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_35_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778554560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791006464))))[name = string("model_model_layers_35_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_35_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791045440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803497344))))[name = string("model_model_layers_35_mlp_up_proj_weight_palettized")]; tensor model_model_layers_35_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803536320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815988224))))[name = string("model_model_layers_35_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_973_axis_0 = const()[name = string("op_973_axis_0"), val = int32(1)]; int32 var_973_batch_dims_0 = const()[name = string("op_973_batch_dims_0"), val = int32(0)]; bool var_973_validate_indices_0 = const()[name = string("op_973_validate_indices_0"), val = bool(false)]; tensor var_965_to_fp16 = const()[name = string("op_965_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815998528)))]; tensor var_973_cast_fp16 = gather(axis = var_973_axis_0, batch_dims = var_973_batch_dims_0, indices = select_0, validate_indices = var_973_validate_indices_0, x = var_965_to_fp16)[name = string("op_973_cast_fp16")]; tensor var_978 = const()[name = string("op_978"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_978, x = var_973_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_988_axis_0 = const()[name = string("op_988_axis_0"), val = int32(1)]; int32 var_988_batch_dims_0 = const()[name = string("op_988_batch_dims_0"), val = int32(0)]; bool var_988_validate_indices_0 = const()[name = string("op_988_validate_indices_0"), val = bool(false)]; tensor var_980_to_fp16 = const()[name = string("op_980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832775808)))]; tensor var_988_cast_fp16 = gather(axis = var_988_axis_0, batch_dims = var_988_batch_dims_0, indices = select_0, validate_indices = var_988_validate_indices_0, x = var_980_to_fp16)[name = string("op_988_cast_fp16")]; tensor var_993 = const()[name = string("op_993"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_993, x = var_988_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1020_axes_0 = const()[name = string("op_1020_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849553088)))]; fp16 var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1020_cast_fp16 = layer_norm(axes = var_1020_axes_0, epsilon = var_1008_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1026 = const()[name = string("op_1026"), val = tensor([0, 2, 1])]; tensor var_1029_axes_0 = const()[name = string("op_1029_axes_0"), val = tensor([2])]; tensor var_1027 = transpose(perm = var_1026, x = var_1020_cast_fp16)[name = string("transpose_107")]; tensor var_1029 = expand_dims(axes = var_1029_axes_0, x = var_1027)[name = string("op_1029")]; string var_1045_pad_type_0 = const()[name = string("op_1045_pad_type_0"), val = string("valid")]; tensor var_1045_strides_0 = const()[name = string("op_1045_strides_0"), val = tensor([1, 1])]; tensor var_1045_pad_0 = const()[name = string("op_1045_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1045_dilations_0 = const()[name = string("op_1045_dilations_0"), val = tensor([1, 1])]; int32 var_1045_groups_0 = const()[name = string("op_1045_groups_0"), val = int32(1)]; tensor var_1045 = conv(dilations = var_1045_dilations_0, groups = var_1045_groups_0, pad = var_1045_pad_0, pad_type = var_1045_pad_type_0, strides = var_1045_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_1029)[name = string("op_1045")]; tensor var_1050 = const()[name = string("op_1050"), val = tensor([1, 32, 1, 128])]; tensor var_1051 = reshape(shape = var_1050, x = var_1045)[name = string("op_1051")]; string var_1067_pad_type_0 = const()[name = string("op_1067_pad_type_0"), val = string("valid")]; tensor var_1067_strides_0 = const()[name = string("op_1067_strides_0"), val = tensor([1, 1])]; tensor var_1067_pad_0 = const()[name = string("op_1067_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1067_dilations_0 = const()[name = string("op_1067_dilations_0"), val = tensor([1, 1])]; int32 var_1067_groups_0 = const()[name = string("op_1067_groups_0"), val = int32(1)]; tensor var_1067 = conv(dilations = var_1067_dilations_0, groups = var_1067_groups_0, pad = var_1067_pad_0, pad_type = var_1067_pad_type_0, strides = var_1067_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_1029)[name = string("op_1067")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([1, 8, 1, 128])]; tensor var_1073 = reshape(shape = var_1072, x = var_1067)[name = string("op_1073")]; string var_1089_pad_type_0 = const()[name = string("op_1089_pad_type_0"), val = string("valid")]; tensor var_1089_strides_0 = const()[name = string("op_1089_strides_0"), val = tensor([1, 1])]; tensor var_1089_pad_0 = const()[name = string("op_1089_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1089_dilations_0 = const()[name = string("op_1089_dilations_0"), val = tensor([1, 1])]; int32 var_1089_groups_0 = const()[name = string("op_1089_groups_0"), val = int32(1)]; tensor var_1089 = conv(dilations = var_1089_dilations_0, groups = var_1089_groups_0, pad = var_1089_pad_0, pad_type = var_1089_pad_type_0, strides = var_1089_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_1029)[name = string("op_1089")]; tensor var_1094 = const()[name = string("op_1094"), val = tensor([1, 8, 1, 128])]; tensor var_1095 = reshape(shape = var_1094, x = var_1089)[name = string("op_1095")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = var_1051)[name = string("mean_3")]; tensor input_5 = sub(x = var_1051, y = mean_3)[name = string("input_5")]; tensor var_1116_axes_0 = const()[name = string("op_1116_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558272)))]; fp16 var_1104_to_fp16 = const()[name = string("op_1104_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1116_cast_fp16 = layer_norm(axes = var_1116_axes_0, epsilon = var_1104_to_fp16, gamma = model_model_layers_18_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1116_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = var_1073)[name = string("mean_5")]; tensor input_7 = sub(x = var_1073, y = mean_5)[name = string("input_7")]; tensor var_1134_axes_0 = const()[name = string("op_1134_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558592)))]; fp16 var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1134_cast_fp16 = layer_norm(axes = var_1134_axes_0, epsilon = var_1122_to_fp16, gamma = model_model_layers_18_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1134_cast_fp16")]; tensor var_1137 = mul(x = var_1116_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1137")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1116_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1116_cast_fp16)[name = string("x2_1")]; fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; tensor var_1158 = mul(x = x2_1, y = const_5_promoted)[name = string("op_1158")]; int32 var_1160 = const()[name = string("op_1160"), val = int32(-1)]; bool var_1161_interleave_0 = const()[name = string("op_1161_interleave_0"), val = bool(false)]; tensor var_1161 = concat(axis = var_1160, interleave = var_1161_interleave_0, values = (var_1158, x1_1))[name = string("op_1161")]; tensor var_1162 = mul(x = var_1161, y = sin_1_cast_fp16)[name = string("op_1162")]; tensor query_states_1 = add(x = var_1137, y = var_1162)[name = string("query_states_1")]; tensor var_1165 = mul(x = var_1134_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1165")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1134_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1134_cast_fp16)[name = string("x2_3")]; fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; tensor var_1186 = mul(x = x2_3, y = const_8_promoted)[name = string("op_1186")]; int32 var_1188 = const()[name = string("op_1188"), val = int32(-1)]; bool var_1189_interleave_0 = const()[name = string("op_1189_interleave_0"), val = bool(false)]; tensor var_1189 = concat(axis = var_1188, interleave = var_1189_interleave_0, values = (var_1186, x1_3))[name = string("op_1189")]; tensor var_1190 = mul(x = var_1189, y = sin_1_cast_fp16)[name = string("op_1190")]; tensor key_states_1 = add(x = var_1165, y = var_1190)[name = string("key_states_1")]; int32 var_1194 = const()[name = string("op_1194"), val = int32(1)]; tensor var_1195 = add(x = current_pos, y = var_1194)[name = string("op_1195")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([18])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([19])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1195, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_1, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([54])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([55])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1195, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1095, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_1245_begin_0 = const()[name = string("op_1245_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_1245_end_0 = const()[name = string("op_1245_end_0"), val = tensor([19, 8, 1024, 128])]; tensor var_1245_end_mask_0 = const()[name = string("op_1245_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1245_cast_fp16 = slice_by_index(begin = var_1245_begin_0, end = var_1245_end_0, end_mask = var_1245_end_mask_0, x = coreml_update_state_37)[name = string("op_1245_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1245_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1252_begin_0 = const()[name = string("op_1252_begin_0"), val = tensor([54, 0, 0, 0])]; tensor var_1252_end_0 = const()[name = string("op_1252_end_0"), val = tensor([55, 8, 1024, 128])]; tensor var_1252_end_mask_0 = const()[name = string("op_1252_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = coreml_update_state_37)[name = string("op_1252_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1252_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1289 = const()[name = string("op_1289"), val = tensor([1, 4, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1289, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1301 = const()[name = string("op_1301"), val = tensor([1, -1, 1024, 128])]; tensor key_states_3_cast_fp16 = reshape(shape = var_1301, x = x_9_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1309 = const()[name = string("op_1309"), val = tensor([1, 4, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1309, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_1321 = const()[name = string("op_1321"), val = tensor([1, -1, 1024, 128])]; tensor value_states_3_cast_fp16 = reshape(shape = var_1321, x = x_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_1336_transpose_x_1 = const()[name = string("op_1336_transpose_x_1"), val = bool(false)]; bool var_1336_transpose_y_1 = const()[name = string("op_1336_transpose_y_1"), val = bool(true)]; tensor var_1336 = matmul(transpose_x = var_1336_transpose_x_1, transpose_y = var_1336_transpose_y_1, x = query_states_1, y = key_states_3_cast_fp16)[name = string("op_1336")]; fp16 var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1336, y = var_1337_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1372 = const()[name = string("op_1372"), val = int32(-1)]; tensor attn_weights_5_cast_fp16 = softmax(axis = var_1372, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_1383_perm_0 = const()[name = string("op_1383_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1387 = const()[name = string("op_1387"), val = tensor([1, 1, 4096])]; tensor var_1383_cast_fp16 = transpose(perm = var_1383_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_106")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1387, x = var_1383_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1392 = const()[name = string("op_1392"), val = tensor([0, 2, 1])]; string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")]; int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)]; tensor var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor([1])]; tensor var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor([0, 0])]; tensor var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854801856))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1393_cast_fp16 = transpose(perm = var_1392, x = attn_output_5_cast_fp16)[name = string("transpose_105")]; tensor var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1393_cast_fp16)[name = string("op_1408_cast_fp16")]; tensor var_1412 = const()[name = string("op_1412"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1412, x = var_1408_cast_fp16)[name = string("transpose_104")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_1431_axes_0 = const()[name = string("op_1431_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854812160)))]; fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1431_cast_fp16 = layer_norm(axes = var_1431_axes_0, epsilon = var_1419_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_1431_cast_fp16")]; tensor var_1445 = const()[name = string("op_1445"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_1446 = transpose(perm = var_1445, x = var_1431_cast_fp16)[name = string("transpose_103")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_1446)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_1468_axes_0 = const()[name = string("op_1468_axes_0"), val = tensor([2])]; tensor var_1468 = squeeze(axes = var_1468_axes_0, x = e_1)[name = string("op_1468")]; tensor var_1469 = const()[name = string("op_1469"), val = tensor([0, 2, 1])]; tensor var_1470 = transpose(perm = var_1469, x = var_1468)[name = string("transpose_102")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_1470)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_1488_axes_0 = const()[name = string("op_1488_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854817344)))]; fp16 var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1488_cast_fp16 = layer_norm(axes = var_1488_axes_0, epsilon = var_1476_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_1488_cast_fp16")]; tensor var_1494 = const()[name = string("op_1494"), val = tensor([0, 2, 1])]; tensor var_1497_axes_0 = const()[name = string("op_1497_axes_0"), val = tensor([2])]; tensor var_1495 = transpose(perm = var_1494, x = var_1488_cast_fp16)[name = string("transpose_101")]; tensor var_1497 = expand_dims(axes = var_1497_axes_0, x = var_1495)[name = string("op_1497")]; string var_1513_pad_type_0 = const()[name = string("op_1513_pad_type_0"), val = string("valid")]; tensor var_1513_strides_0 = const()[name = string("op_1513_strides_0"), val = tensor([1, 1])]; tensor var_1513_pad_0 = const()[name = string("op_1513_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1513_dilations_0 = const()[name = string("op_1513_dilations_0"), val = tensor([1, 1])]; int32 var_1513_groups_0 = const()[name = string("op_1513_groups_0"), val = int32(1)]; tensor var_1513 = conv(dilations = var_1513_dilations_0, groups = var_1513_groups_0, pad = var_1513_pad_0, pad_type = var_1513_pad_type_0, strides = var_1513_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_1497)[name = string("op_1513")]; tensor var_1518 = const()[name = string("op_1518"), val = tensor([1, 32, 1, 128])]; tensor var_1519 = reshape(shape = var_1518, x = var_1513)[name = string("op_1519")]; string var_1535_pad_type_0 = const()[name = string("op_1535_pad_type_0"), val = string("valid")]; tensor var_1535_strides_0 = const()[name = string("op_1535_strides_0"), val = tensor([1, 1])]; tensor var_1535_pad_0 = const()[name = string("op_1535_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1535_dilations_0 = const()[name = string("op_1535_dilations_0"), val = tensor([1, 1])]; int32 var_1535_groups_0 = const()[name = string("op_1535_groups_0"), val = int32(1)]; tensor var_1535 = conv(dilations = var_1535_dilations_0, groups = var_1535_groups_0, pad = var_1535_pad_0, pad_type = var_1535_pad_type_0, strides = var_1535_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_1497)[name = string("op_1535")]; tensor var_1540 = const()[name = string("op_1540"), val = tensor([1, 8, 1, 128])]; tensor var_1541 = reshape(shape = var_1540, x = var_1535)[name = string("op_1541")]; string var_1557_pad_type_0 = const()[name = string("op_1557_pad_type_0"), val = string("valid")]; tensor var_1557_strides_0 = const()[name = string("op_1557_strides_0"), val = tensor([1, 1])]; tensor var_1557_pad_0 = const()[name = string("op_1557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1557_dilations_0 = const()[name = string("op_1557_dilations_0"), val = tensor([1, 1])]; int32 var_1557_groups_0 = const()[name = string("op_1557_groups_0"), val = int32(1)]; tensor var_1557 = conv(dilations = var_1557_dilations_0, groups = var_1557_groups_0, pad = var_1557_pad_0, pad_type = var_1557_pad_type_0, strides = var_1557_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_1497)[name = string("op_1557")]; tensor var_1562 = const()[name = string("op_1562"), val = tensor([1, 8, 1, 128])]; tensor var_1563 = reshape(shape = var_1562, x = var_1557)[name = string("op_1563")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = var_1519)[name = string("mean_11")]; tensor input_23 = sub(x = var_1519, y = mean_11)[name = string("input_23")]; tensor var_1584_axes_0 = const()[name = string("op_1584_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822528)))]; fp16 var_1572_to_fp16 = const()[name = string("op_1572_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1584_cast_fp16 = layer_norm(axes = var_1584_axes_0, epsilon = var_1572_to_fp16, gamma = model_model_layers_19_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_1584_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = var_1541)[name = string("mean_13")]; tensor input_25 = sub(x = var_1541, y = mean_13)[name = string("input_25")]; tensor var_1602_axes_0 = const()[name = string("op_1602_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822848)))]; fp16 var_1590_to_fp16 = const()[name = string("op_1590_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1602_cast_fp16 = layer_norm(axes = var_1602_axes_0, epsilon = var_1590_to_fp16, gamma = model_model_layers_19_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_1602_cast_fp16")]; tensor var_1605 = mul(x = var_1584_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1605")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_1584_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_1584_cast_fp16)[name = string("x2_5")]; fp16 const_23_promoted = const()[name = string("const_23_promoted"), val = fp16(-0x1p+0)]; tensor var_1626 = mul(x = x2_5, y = const_23_promoted)[name = string("op_1626")]; int32 var_1628 = const()[name = string("op_1628"), val = int32(-1)]; bool var_1629_interleave_0 = const()[name = string("op_1629_interleave_0"), val = bool(false)]; tensor var_1629 = concat(axis = var_1628, interleave = var_1629_interleave_0, values = (var_1626, x1_5))[name = string("op_1629")]; tensor var_1630 = mul(x = var_1629, y = sin_1_cast_fp16)[name = string("op_1630")]; tensor query_states_5 = add(x = var_1605, y = var_1630)[name = string("query_states_5")]; tensor var_1633 = mul(x = var_1602_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1633")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_1602_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_1602_cast_fp16)[name = string("x2_7")]; fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; tensor var_1654 = mul(x = x2_7, y = const_26_promoted)[name = string("op_1654")]; int32 var_1656 = const()[name = string("op_1656"), val = int32(-1)]; bool var_1657_interleave_0 = const()[name = string("op_1657_interleave_0"), val = bool(false)]; tensor var_1657 = concat(axis = var_1656, interleave = var_1657_interleave_0, values = (var_1654, x1_7))[name = string("op_1657")]; tensor var_1658 = mul(x = var_1657, y = sin_1_cast_fp16)[name = string("op_1658")]; tensor key_states_5 = add(x = var_1633, y = var_1658)[name = string("key_states_5")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([19])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([20])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1195, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_5, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([55])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([56])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1195, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1563, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1713_end_0 = const()[name = string("op_1713_end_0"), val = tensor([20, 8, 1024, 128])]; tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = var_1713_end_0, end_mask = var_1713_end_mask_0, x = coreml_update_state_39)[name = string("op_1713_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_1713_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_1720_begin_0 = const()[name = string("op_1720_begin_0"), val = tensor([55, 0, 0, 0])]; tensor var_1720_end_0 = const()[name = string("op_1720_end_0"), val = tensor([56, 8, 1024, 128])]; tensor var_1720_end_mask_0 = const()[name = string("op_1720_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1720_cast_fp16 = slice_by_index(begin = var_1720_begin_0, end = var_1720_end_0, end_mask = var_1720_end_mask_0, x = coreml_update_state_39)[name = string("op_1720_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_1720_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_1757 = const()[name = string("op_1757"), val = tensor([1, 4, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_1757, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_1769 = const()[name = string("op_1769"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_1769, x = x_29_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_1777 = const()[name = string("op_1777"), val = tensor([1, 4, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_1777, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; tensor var_1789 = const()[name = string("op_1789"), val = tensor([1, -1, 1024, 128])]; tensor value_states_9_cast_fp16 = reshape(shape = var_1789, x = x_35_cast_fp16)[name = string("value_states_9_cast_fp16")]; bool var_1804_transpose_x_1 = const()[name = string("op_1804_transpose_x_1"), val = bool(false)]; bool var_1804_transpose_y_1 = const()[name = string("op_1804_transpose_y_1"), val = bool(true)]; tensor var_1804 = matmul(transpose_x = var_1804_transpose_x_1, transpose_y = var_1804_transpose_y_1, x = query_states_5, y = key_states_7_cast_fp16)[name = string("op_1804")]; fp16 var_1805_to_fp16 = const()[name = string("op_1805_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_1804, y = var_1805_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_1840 = const()[name = string("op_1840"), val = int32(-1)]; tensor attn_weights_11_cast_fp16 = softmax(axis = var_1840, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_9_cast_fp16)[name = string("attn_output_11_cast_fp16")]; tensor var_1851_perm_0 = const()[name = string("op_1851_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1855 = const()[name = string("op_1855"), val = tensor([1, 1, 4096])]; tensor var_1851_cast_fp16 = transpose(perm = var_1851_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_100")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_1855, x = var_1851_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_1860 = const()[name = string("op_1860"), val = tensor([0, 2, 1])]; string var_1876_pad_type_0 = const()[name = string("op_1876_pad_type_0"), val = string("valid")]; int32 var_1876_groups_0 = const()[name = string("op_1876_groups_0"), val = int32(1)]; tensor var_1876_strides_0 = const()[name = string("op_1876_strides_0"), val = tensor([1])]; tensor var_1876_pad_0 = const()[name = string("op_1876_pad_0"), val = tensor([0, 0])]; tensor var_1876_dilations_0 = const()[name = string("op_1876_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854823168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860066112))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1861_cast_fp16 = transpose(perm = var_1860, x = attn_output_15_cast_fp16)[name = string("transpose_99")]; tensor var_1876_cast_fp16 = conv(dilations = var_1876_dilations_0, groups = var_1876_groups_0, pad = var_1876_pad_0, pad_type = var_1876_pad_type_0, strides = var_1876_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("op_1876_cast_fp16")]; tensor var_1880 = const()[name = string("op_1880"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_1880, x = var_1876_cast_fp16)[name = string("transpose_98")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_1899_axes_0 = const()[name = string("op_1899_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860076416)))]; fp16 var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1899_cast_fp16 = layer_norm(axes = var_1899_axes_0, epsilon = var_1887_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_1899_cast_fp16")]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_1914 = transpose(perm = var_1913, x = var_1899_cast_fp16)[name = string("transpose_97")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_1914)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_1936_axes_0 = const()[name = string("op_1936_axes_0"), val = tensor([2])]; tensor var_1936 = squeeze(axes = var_1936_axes_0, x = e_3)[name = string("op_1936")]; tensor var_1937 = const()[name = string("op_1937"), val = tensor([0, 2, 1])]; tensor var_1938 = transpose(perm = var_1937, x = var_1936)[name = string("transpose_96")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_1938)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_1956_axes_0 = const()[name = string("op_1956_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860081600)))]; fp16 var_1944_to_fp16 = const()[name = string("op_1944_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1956_cast_fp16 = layer_norm(axes = var_1956_axes_0, epsilon = var_1944_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_1956_cast_fp16")]; tensor var_1962 = const()[name = string("op_1962"), val = tensor([0, 2, 1])]; tensor var_1965_axes_0 = const()[name = string("op_1965_axes_0"), val = tensor([2])]; tensor var_1963 = transpose(perm = var_1962, x = var_1956_cast_fp16)[name = string("transpose_95")]; tensor var_1965 = expand_dims(axes = var_1965_axes_0, x = var_1963)[name = string("op_1965")]; string var_1981_pad_type_0 = const()[name = string("op_1981_pad_type_0"), val = string("valid")]; tensor var_1981_strides_0 = const()[name = string("op_1981_strides_0"), val = tensor([1, 1])]; tensor var_1981_pad_0 = const()[name = string("op_1981_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1981_dilations_0 = const()[name = string("op_1981_dilations_0"), val = tensor([1, 1])]; int32 var_1981_groups_0 = const()[name = string("op_1981_groups_0"), val = int32(1)]; tensor var_1981 = conv(dilations = var_1981_dilations_0, groups = var_1981_groups_0, pad = var_1981_pad_0, pad_type = var_1981_pad_type_0, strides = var_1981_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_1965)[name = string("op_1981")]; tensor var_1986 = const()[name = string("op_1986"), val = tensor([1, 32, 1, 128])]; tensor var_1987 = reshape(shape = var_1986, x = var_1981)[name = string("op_1987")]; string var_2003_pad_type_0 = const()[name = string("op_2003_pad_type_0"), val = string("valid")]; tensor var_2003_strides_0 = const()[name = string("op_2003_strides_0"), val = tensor([1, 1])]; tensor var_2003_pad_0 = const()[name = string("op_2003_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2003_dilations_0 = const()[name = string("op_2003_dilations_0"), val = tensor([1, 1])]; int32 var_2003_groups_0 = const()[name = string("op_2003_groups_0"), val = int32(1)]; tensor var_2003 = conv(dilations = var_2003_dilations_0, groups = var_2003_groups_0, pad = var_2003_pad_0, pad_type = var_2003_pad_type_0, strides = var_2003_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_1965)[name = string("op_2003")]; tensor var_2008 = const()[name = string("op_2008"), val = tensor([1, 8, 1, 128])]; tensor var_2009 = reshape(shape = var_2008, x = var_2003)[name = string("op_2009")]; string var_2025_pad_type_0 = const()[name = string("op_2025_pad_type_0"), val = string("valid")]; tensor var_2025_strides_0 = const()[name = string("op_2025_strides_0"), val = tensor([1, 1])]; tensor var_2025_pad_0 = const()[name = string("op_2025_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2025_dilations_0 = const()[name = string("op_2025_dilations_0"), val = tensor([1, 1])]; int32 var_2025_groups_0 = const()[name = string("op_2025_groups_0"), val = int32(1)]; tensor var_2025 = conv(dilations = var_2025_dilations_0, groups = var_2025_groups_0, pad = var_2025_pad_0, pad_type = var_2025_pad_type_0, strides = var_2025_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_1965)[name = string("op_2025")]; tensor var_2030 = const()[name = string("op_2030"), val = tensor([1, 8, 1, 128])]; tensor var_2031 = reshape(shape = var_2030, x = var_2025)[name = string("op_2031")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = var_1987)[name = string("mean_19")]; tensor input_41 = sub(x = var_1987, y = mean_19)[name = string("input_41")]; tensor var_2052_axes_0 = const()[name = string("op_2052_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860086784)))]; fp16 var_2040_to_fp16 = const()[name = string("op_2040_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2052_cast_fp16 = layer_norm(axes = var_2052_axes_0, epsilon = var_2040_to_fp16, gamma = model_model_layers_20_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2052_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = var_2009)[name = string("mean_21")]; tensor input_43 = sub(x = var_2009, y = mean_21)[name = string("input_43")]; tensor var_2070_axes_0 = const()[name = string("op_2070_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087104)))]; fp16 var_2058_to_fp16 = const()[name = string("op_2058_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2070_cast_fp16 = layer_norm(axes = var_2070_axes_0, epsilon = var_2058_to_fp16, gamma = model_model_layers_20_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2070_cast_fp16")]; tensor var_2073 = mul(x = var_2052_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2073")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2052_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2052_cast_fp16)[name = string("x2_9")]; fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; tensor var_2094 = mul(x = x2_9, y = const_41_promoted)[name = string("op_2094")]; int32 var_2096 = const()[name = string("op_2096"), val = int32(-1)]; bool var_2097_interleave_0 = const()[name = string("op_2097_interleave_0"), val = bool(false)]; tensor var_2097 = concat(axis = var_2096, interleave = var_2097_interleave_0, values = (var_2094, x1_9))[name = string("op_2097")]; tensor var_2098 = mul(x = var_2097, y = sin_1_cast_fp16)[name = string("op_2098")]; tensor query_states_9 = add(x = var_2073, y = var_2098)[name = string("query_states_9")]; tensor var_2101 = mul(x = var_2070_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2101")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2070_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2070_cast_fp16)[name = string("x2_11")]; fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; tensor var_2122 = mul(x = x2_11, y = const_44_promoted)[name = string("op_2122")]; int32 var_2124 = const()[name = string("op_2124"), val = int32(-1)]; bool var_2125_interleave_0 = const()[name = string("op_2125_interleave_0"), val = bool(false)]; tensor var_2125 = concat(axis = var_2124, interleave = var_2125_interleave_0, values = (var_2122, x1_11))[name = string("op_2125")]; tensor var_2126 = mul(x = var_2125, y = sin_1_cast_fp16)[name = string("op_2126")]; tensor key_states_9 = add(x = var_2101, y = var_2126)[name = string("key_states_9")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([20])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([21])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1195, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_9, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([56])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([57])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1195, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2031, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_2181_begin_0 = const()[name = string("op_2181_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_2181_end_0 = const()[name = string("op_2181_end_0"), val = tensor([21, 8, 1024, 128])]; tensor var_2181_end_mask_0 = const()[name = string("op_2181_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2181_cast_fp16 = slice_by_index(begin = var_2181_begin_0, end = var_2181_end_0, end_mask = var_2181_end_mask_0, x = coreml_update_state_41)[name = string("op_2181_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2181_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2188_begin_0 = const()[name = string("op_2188_begin_0"), val = tensor([56, 0, 0, 0])]; tensor var_2188_end_0 = const()[name = string("op_2188_end_0"), val = tensor([57, 8, 1024, 128])]; tensor var_2188_end_mask_0 = const()[name = string("op_2188_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = coreml_update_state_41)[name = string("op_2188_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2188_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2225 = const()[name = string("op_2225"), val = tensor([1, 4, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2225, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2237 = const()[name = string("op_2237"), val = tensor([1, -1, 1024, 128])]; tensor key_states_11_cast_fp16 = reshape(shape = var_2237, x = x_49_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2245 = const()[name = string("op_2245"), val = tensor([1, 4, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2245, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_2257 = const()[name = string("op_2257"), val = tensor([1, -1, 1024, 128])]; tensor value_states_15_cast_fp16 = reshape(shape = var_2257, x = x_55_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_2272_transpose_x_1 = const()[name = string("op_2272_transpose_x_1"), val = bool(false)]; bool var_2272_transpose_y_1 = const()[name = string("op_2272_transpose_y_1"), val = bool(true)]; tensor var_2272 = matmul(transpose_x = var_2272_transpose_x_1, transpose_y = var_2272_transpose_y_1, x = query_states_9, y = key_states_11_cast_fp16)[name = string("op_2272")]; fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2272, y = var_2273_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2308 = const()[name = string("op_2308"), val = int32(-1)]; tensor attn_weights_17_cast_fp16 = softmax(axis = var_2308, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_2319_perm_0 = const()[name = string("op_2319_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2323 = const()[name = string("op_2323"), val = tensor([1, 1, 4096])]; tensor var_2319_cast_fp16 = transpose(perm = var_2319_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_94")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2323, x = var_2319_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2328 = const()[name = string("op_2328"), val = tensor([0, 2, 1])]; string var_2344_pad_type_0 = const()[name = string("op_2344_pad_type_0"), val = string("valid")]; int32 var_2344_groups_0 = const()[name = string("op_2344_groups_0"), val = int32(1)]; tensor var_2344_strides_0 = const()[name = string("op_2344_strides_0"), val = tensor([1])]; tensor var_2344_pad_0 = const()[name = string("op_2344_pad_0"), val = tensor([0, 0])]; tensor var_2344_dilations_0 = const()[name = string("op_2344_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865330368))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2329_cast_fp16 = transpose(perm = var_2328, x = attn_output_25_cast_fp16)[name = string("transpose_93")]; tensor var_2344_cast_fp16 = conv(dilations = var_2344_dilations_0, groups = var_2344_groups_0, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2344_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2329_cast_fp16)[name = string("op_2344_cast_fp16")]; tensor var_2348 = const()[name = string("op_2348"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_2348, x = var_2344_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_2367_axes_0 = const()[name = string("op_2367_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865340672)))]; fp16 var_2355_to_fp16 = const()[name = string("op_2355_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2367_cast_fp16 = layer_norm(axes = var_2367_axes_0, epsilon = var_2355_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_2367_cast_fp16")]; tensor var_2381 = const()[name = string("op_2381"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_2382 = transpose(perm = var_2381, x = var_2367_cast_fp16)[name = string("transpose_91")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2382)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_2404_axes_0 = const()[name = string("op_2404_axes_0"), val = tensor([2])]; tensor var_2404 = squeeze(axes = var_2404_axes_0, x = e_5)[name = string("op_2404")]; tensor var_2405 = const()[name = string("op_2405"), val = tensor([0, 2, 1])]; tensor var_2406 = transpose(perm = var_2405, x = var_2404)[name = string("transpose_90")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_2406)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_2424_axes_0 = const()[name = string("op_2424_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865345856)))]; fp16 var_2412_to_fp16 = const()[name = string("op_2412_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2424_cast_fp16 = layer_norm(axes = var_2424_axes_0, epsilon = var_2412_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_2424_cast_fp16")]; tensor var_2430 = const()[name = string("op_2430"), val = tensor([0, 2, 1])]; tensor var_2433_axes_0 = const()[name = string("op_2433_axes_0"), val = tensor([2])]; tensor var_2431 = transpose(perm = var_2430, x = var_2424_cast_fp16)[name = string("transpose_89")]; tensor var_2433 = expand_dims(axes = var_2433_axes_0, x = var_2431)[name = string("op_2433")]; string var_2449_pad_type_0 = const()[name = string("op_2449_pad_type_0"), val = string("valid")]; tensor var_2449_strides_0 = const()[name = string("op_2449_strides_0"), val = tensor([1, 1])]; tensor var_2449_pad_0 = const()[name = string("op_2449_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2449_dilations_0 = const()[name = string("op_2449_dilations_0"), val = tensor([1, 1])]; int32 var_2449_groups_0 = const()[name = string("op_2449_groups_0"), val = int32(1)]; tensor var_2449 = conv(dilations = var_2449_dilations_0, groups = var_2449_groups_0, pad = var_2449_pad_0, pad_type = var_2449_pad_type_0, strides = var_2449_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_2433)[name = string("op_2449")]; tensor var_2454 = const()[name = string("op_2454"), val = tensor([1, 32, 1, 128])]; tensor var_2455 = reshape(shape = var_2454, x = var_2449)[name = string("op_2455")]; string var_2471_pad_type_0 = const()[name = string("op_2471_pad_type_0"), val = string("valid")]; tensor var_2471_strides_0 = const()[name = string("op_2471_strides_0"), val = tensor([1, 1])]; tensor var_2471_pad_0 = const()[name = string("op_2471_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2471_dilations_0 = const()[name = string("op_2471_dilations_0"), val = tensor([1, 1])]; int32 var_2471_groups_0 = const()[name = string("op_2471_groups_0"), val = int32(1)]; tensor var_2471 = conv(dilations = var_2471_dilations_0, groups = var_2471_groups_0, pad = var_2471_pad_0, pad_type = var_2471_pad_type_0, strides = var_2471_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_2433)[name = string("op_2471")]; tensor var_2476 = const()[name = string("op_2476"), val = tensor([1, 8, 1, 128])]; tensor var_2477 = reshape(shape = var_2476, x = var_2471)[name = string("op_2477")]; string var_2493_pad_type_0 = const()[name = string("op_2493_pad_type_0"), val = string("valid")]; tensor var_2493_strides_0 = const()[name = string("op_2493_strides_0"), val = tensor([1, 1])]; tensor var_2493_pad_0 = const()[name = string("op_2493_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2493_dilations_0 = const()[name = string("op_2493_dilations_0"), val = tensor([1, 1])]; int32 var_2493_groups_0 = const()[name = string("op_2493_groups_0"), val = int32(1)]; tensor var_2493 = conv(dilations = var_2493_dilations_0, groups = var_2493_groups_0, pad = var_2493_pad_0, pad_type = var_2493_pad_type_0, strides = var_2493_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_2433)[name = string("op_2493")]; tensor var_2498 = const()[name = string("op_2498"), val = tensor([1, 8, 1, 128])]; tensor var_2499 = reshape(shape = var_2498, x = var_2493)[name = string("op_2499")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = var_2455)[name = string("mean_27")]; tensor input_59 = sub(x = var_2455, y = mean_27)[name = string("input_59")]; tensor var_2520_axes_0 = const()[name = string("op_2520_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351040)))]; fp16 var_2508_to_fp16 = const()[name = string("op_2508_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2520_cast_fp16 = layer_norm(axes = var_2520_axes_0, epsilon = var_2508_to_fp16, gamma = model_model_layers_21_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_2520_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = var_2477)[name = string("mean_29")]; tensor input_61 = sub(x = var_2477, y = mean_29)[name = string("input_61")]; tensor var_2538_axes_0 = const()[name = string("op_2538_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351360)))]; fp16 var_2526_to_fp16 = const()[name = string("op_2526_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2538_cast_fp16 = layer_norm(axes = var_2538_axes_0, epsilon = var_2526_to_fp16, gamma = model_model_layers_21_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_2538_cast_fp16")]; tensor var_2541 = mul(x = var_2520_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2541")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_2520_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_2520_cast_fp16)[name = string("x2_13")]; fp16 const_59_promoted = const()[name = string("const_59_promoted"), val = fp16(-0x1p+0)]; tensor var_2562 = mul(x = x2_13, y = const_59_promoted)[name = string("op_2562")]; int32 var_2564 = const()[name = string("op_2564"), val = int32(-1)]; bool var_2565_interleave_0 = const()[name = string("op_2565_interleave_0"), val = bool(false)]; tensor var_2565 = concat(axis = var_2564, interleave = var_2565_interleave_0, values = (var_2562, x1_13))[name = string("op_2565")]; tensor var_2566 = mul(x = var_2565, y = sin_1_cast_fp16)[name = string("op_2566")]; tensor query_states_13 = add(x = var_2541, y = var_2566)[name = string("query_states_13")]; tensor var_2569 = mul(x = var_2538_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2569")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_2538_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_2538_cast_fp16)[name = string("x2_15")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_2590 = mul(x = x2_15, y = const_62_promoted)[name = string("op_2590")]; int32 var_2592 = const()[name = string("op_2592"), val = int32(-1)]; bool var_2593_interleave_0 = const()[name = string("op_2593_interleave_0"), val = bool(false)]; tensor var_2593 = concat(axis = var_2592, interleave = var_2593_interleave_0, values = (var_2590, x1_15))[name = string("op_2593")]; tensor var_2594 = mul(x = var_2593, y = sin_1_cast_fp16)[name = string("op_2594")]; tensor key_states_13 = add(x = var_2569, y = var_2594)[name = string("key_states_13")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([21])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([22])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1195, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_13, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([57])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([58])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1195, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_2499, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_2649_begin_0 = const()[name = string("op_2649_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_2649_end_0 = const()[name = string("op_2649_end_0"), val = tensor([22, 8, 1024, 128])]; tensor var_2649_end_mask_0 = const()[name = string("op_2649_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2649_cast_fp16 = slice_by_index(begin = var_2649_begin_0, end = var_2649_end_0, end_mask = var_2649_end_mask_0, x = coreml_update_state_43)[name = string("op_2649_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_2649_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_2656_begin_0 = const()[name = string("op_2656_begin_0"), val = tensor([57, 0, 0, 0])]; tensor var_2656_end_0 = const()[name = string("op_2656_end_0"), val = tensor([58, 8, 1024, 128])]; tensor var_2656_end_mask_0 = const()[name = string("op_2656_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2656_cast_fp16 = slice_by_index(begin = var_2656_begin_0, end = var_2656_end_0, end_mask = var_2656_end_mask_0, x = coreml_update_state_43)[name = string("op_2656_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_2656_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_2693 = const()[name = string("op_2693"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_2693, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_2705 = const()[name = string("op_2705"), val = tensor([1, -1, 1024, 128])]; tensor key_states_15_cast_fp16 = reshape(shape = var_2705, x = x_69_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_2713 = const()[name = string("op_2713"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_2713, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_2725 = const()[name = string("op_2725"), val = tensor([1, -1, 1024, 128])]; tensor value_states_21_cast_fp16 = reshape(shape = var_2725, x = x_75_cast_fp16)[name = string("value_states_21_cast_fp16")]; bool var_2740_transpose_x_1 = const()[name = string("op_2740_transpose_x_1"), val = bool(false)]; bool var_2740_transpose_y_1 = const()[name = string("op_2740_transpose_y_1"), val = bool(true)]; tensor var_2740 = matmul(transpose_x = var_2740_transpose_x_1, transpose_y = var_2740_transpose_y_1, x = query_states_13, y = key_states_15_cast_fp16)[name = string("op_2740")]; fp16 var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_2740, y = var_2741_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_2776 = const()[name = string("op_2776"), val = int32(-1)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_2776, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_21_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_2787_perm_0 = const()[name = string("op_2787_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2791 = const()[name = string("op_2791"), val = tensor([1, 1, 4096])]; tensor var_2787_cast_fp16 = transpose(perm = var_2787_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_88")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_2791, x = var_2787_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1])]; string var_2812_pad_type_0 = const()[name = string("op_2812_pad_type_0"), val = string("valid")]; int32 var_2812_groups_0 = const()[name = string("op_2812_groups_0"), val = int32(1)]; tensor var_2812_strides_0 = const()[name = string("op_2812_strides_0"), val = tensor([1])]; tensor var_2812_pad_0 = const()[name = string("op_2812_pad_0"), val = tensor([0, 0])]; tensor var_2812_dilations_0 = const()[name = string("op_2812_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870594624))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2797_cast_fp16 = transpose(perm = var_2796, x = attn_output_35_cast_fp16)[name = string("transpose_87")]; tensor var_2812_cast_fp16 = conv(dilations = var_2812_dilations_0, groups = var_2812_groups_0, pad = var_2812_pad_0, pad_type = var_2812_pad_type_0, strides = var_2812_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2797_cast_fp16)[name = string("op_2812_cast_fp16")]; tensor var_2816 = const()[name = string("op_2816"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_2816, x = var_2812_cast_fp16)[name = string("transpose_86")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_2835_axes_0 = const()[name = string("op_2835_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870604928)))]; fp16 var_2823_to_fp16 = const()[name = string("op_2823_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2835_cast_fp16 = layer_norm(axes = var_2835_axes_0, epsilon = var_2823_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_2835_cast_fp16")]; tensor var_2849 = const()[name = string("op_2849"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_2850 = transpose(perm = var_2849, x = var_2835_cast_fp16)[name = string("transpose_85")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_2850)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_2872_axes_0 = const()[name = string("op_2872_axes_0"), val = tensor([2])]; tensor var_2872 = squeeze(axes = var_2872_axes_0, x = e_7)[name = string("op_2872")]; tensor var_2873 = const()[name = string("op_2873"), val = tensor([0, 2, 1])]; tensor var_2874 = transpose(perm = var_2873, x = var_2872)[name = string("transpose_84")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_2874)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_2892_axes_0 = const()[name = string("op_2892_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870610112)))]; fp16 var_2880_to_fp16 = const()[name = string("op_2880_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2892_cast_fp16 = layer_norm(axes = var_2892_axes_0, epsilon = var_2880_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_2892_cast_fp16")]; tensor var_2898 = const()[name = string("op_2898"), val = tensor([0, 2, 1])]; tensor var_2901_axes_0 = const()[name = string("op_2901_axes_0"), val = tensor([2])]; tensor var_2899 = transpose(perm = var_2898, x = var_2892_cast_fp16)[name = string("transpose_83")]; tensor var_2901 = expand_dims(axes = var_2901_axes_0, x = var_2899)[name = string("op_2901")]; string var_2917_pad_type_0 = const()[name = string("op_2917_pad_type_0"), val = string("valid")]; tensor var_2917_strides_0 = const()[name = string("op_2917_strides_0"), val = tensor([1, 1])]; tensor var_2917_pad_0 = const()[name = string("op_2917_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2917_dilations_0 = const()[name = string("op_2917_dilations_0"), val = tensor([1, 1])]; int32 var_2917_groups_0 = const()[name = string("op_2917_groups_0"), val = int32(1)]; tensor var_2917 = conv(dilations = var_2917_dilations_0, groups = var_2917_groups_0, pad = var_2917_pad_0, pad_type = var_2917_pad_type_0, strides = var_2917_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_2901)[name = string("op_2917")]; tensor var_2922 = const()[name = string("op_2922"), val = tensor([1, 32, 1, 128])]; tensor var_2923 = reshape(shape = var_2922, x = var_2917)[name = string("op_2923")]; string var_2939_pad_type_0 = const()[name = string("op_2939_pad_type_0"), val = string("valid")]; tensor var_2939_strides_0 = const()[name = string("op_2939_strides_0"), val = tensor([1, 1])]; tensor var_2939_pad_0 = const()[name = string("op_2939_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2939_dilations_0 = const()[name = string("op_2939_dilations_0"), val = tensor([1, 1])]; int32 var_2939_groups_0 = const()[name = string("op_2939_groups_0"), val = int32(1)]; tensor var_2939 = conv(dilations = var_2939_dilations_0, groups = var_2939_groups_0, pad = var_2939_pad_0, pad_type = var_2939_pad_type_0, strides = var_2939_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_2901)[name = string("op_2939")]; tensor var_2944 = const()[name = string("op_2944"), val = tensor([1, 8, 1, 128])]; tensor var_2945 = reshape(shape = var_2944, x = var_2939)[name = string("op_2945")]; string var_2961_pad_type_0 = const()[name = string("op_2961_pad_type_0"), val = string("valid")]; tensor var_2961_strides_0 = const()[name = string("op_2961_strides_0"), val = tensor([1, 1])]; tensor var_2961_pad_0 = const()[name = string("op_2961_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2961_dilations_0 = const()[name = string("op_2961_dilations_0"), val = tensor([1, 1])]; int32 var_2961_groups_0 = const()[name = string("op_2961_groups_0"), val = int32(1)]; tensor var_2961 = conv(dilations = var_2961_dilations_0, groups = var_2961_groups_0, pad = var_2961_pad_0, pad_type = var_2961_pad_type_0, strides = var_2961_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_2901)[name = string("op_2961")]; tensor var_2966 = const()[name = string("op_2966"), val = tensor([1, 8, 1, 128])]; tensor var_2967 = reshape(shape = var_2966, x = var_2961)[name = string("op_2967")]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = var_2923)[name = string("mean_35")]; tensor input_77 = sub(x = var_2923, y = mean_35)[name = string("input_77")]; tensor var_2988_axes_0 = const()[name = string("op_2988_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615296)))]; fp16 var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2988_cast_fp16 = layer_norm(axes = var_2988_axes_0, epsilon = var_2976_to_fp16, gamma = model_model_layers_22_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_2988_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = var_2945)[name = string("mean_37")]; tensor input_79 = sub(x = var_2945, y = mean_37)[name = string("input_79")]; tensor var_3006_axes_0 = const()[name = string("op_3006_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615616)))]; fp16 var_2994_to_fp16 = const()[name = string("op_2994_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3006_cast_fp16 = layer_norm(axes = var_3006_axes_0, epsilon = var_2994_to_fp16, gamma = model_model_layers_22_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3006_cast_fp16")]; tensor var_3009 = mul(x = var_2988_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3009")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_2988_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_2988_cast_fp16)[name = string("x2_17")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_3030 = mul(x = x2_17, y = const_77_promoted)[name = string("op_3030")]; int32 var_3032 = const()[name = string("op_3032"), val = int32(-1)]; bool var_3033_interleave_0 = const()[name = string("op_3033_interleave_0"), val = bool(false)]; tensor var_3033 = concat(axis = var_3032, interleave = var_3033_interleave_0, values = (var_3030, x1_17))[name = string("op_3033")]; tensor var_3034 = mul(x = var_3033, y = sin_1_cast_fp16)[name = string("op_3034")]; tensor query_states_17 = add(x = var_3009, y = var_3034)[name = string("query_states_17")]; tensor var_3037 = mul(x = var_3006_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3037")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3006_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3006_cast_fp16)[name = string("x2_19")]; fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; tensor var_3058 = mul(x = x2_19, y = const_80_promoted)[name = string("op_3058")]; int32 var_3060 = const()[name = string("op_3060"), val = int32(-1)]; bool var_3061_interleave_0 = const()[name = string("op_3061_interleave_0"), val = bool(false)]; tensor var_3061 = concat(axis = var_3060, interleave = var_3061_interleave_0, values = (var_3058, x1_19))[name = string("op_3061")]; tensor var_3062 = mul(x = var_3061, y = sin_1_cast_fp16)[name = string("op_3062")]; tensor key_states_17 = add(x = var_3037, y = var_3062)[name = string("key_states_17")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([22])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([23])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1195, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_17, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([58])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([59])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1195, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_2967, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_3117_begin_0 = const()[name = string("op_3117_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_3117_end_0 = const()[name = string("op_3117_end_0"), val = tensor([23, 8, 1024, 128])]; tensor var_3117_end_mask_0 = const()[name = string("op_3117_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3117_cast_fp16 = slice_by_index(begin = var_3117_begin_0, end = var_3117_end_0, end_mask = var_3117_end_mask_0, x = coreml_update_state_45)[name = string("op_3117_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3117_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3124_begin_0 = const()[name = string("op_3124_begin_0"), val = tensor([58, 0, 0, 0])]; tensor var_3124_end_0 = const()[name = string("op_3124_end_0"), val = tensor([59, 8, 1024, 128])]; tensor var_3124_end_mask_0 = const()[name = string("op_3124_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3124_cast_fp16 = slice_by_index(begin = var_3124_begin_0, end = var_3124_end_0, end_mask = var_3124_end_mask_0, x = coreml_update_state_45)[name = string("op_3124_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3124_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3161 = const()[name = string("op_3161"), val = tensor([1, 4, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3161, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3173 = const()[name = string("op_3173"), val = tensor([1, -1, 1024, 128])]; tensor key_states_19_cast_fp16 = reshape(shape = var_3173, x = x_89_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3181 = const()[name = string("op_3181"), val = tensor([1, 4, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3181, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_3193 = const()[name = string("op_3193"), val = tensor([1, -1, 1024, 128])]; tensor value_states_27_cast_fp16 = reshape(shape = var_3193, x = x_95_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_3208_transpose_x_1 = const()[name = string("op_3208_transpose_x_1"), val = bool(false)]; bool var_3208_transpose_y_1 = const()[name = string("op_3208_transpose_y_1"), val = bool(true)]; tensor var_3208 = matmul(transpose_x = var_3208_transpose_x_1, transpose_y = var_3208_transpose_y_1, x = query_states_17, y = key_states_19_cast_fp16)[name = string("op_3208")]; fp16 var_3209_to_fp16 = const()[name = string("op_3209_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_3208, y = var_3209_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_3244 = const()[name = string("op_3244"), val = int32(-1)]; tensor attn_weights_29_cast_fp16 = softmax(axis = var_3244, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_3255_perm_0 = const()[name = string("op_3255_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3259 = const()[name = string("op_3259"), val = tensor([1, 1, 4096])]; tensor var_3255_cast_fp16 = transpose(perm = var_3255_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_82")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_3259, x = var_3255_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_3264 = const()[name = string("op_3264"), val = tensor([0, 2, 1])]; string var_3280_pad_type_0 = const()[name = string("op_3280_pad_type_0"), val = string("valid")]; int32 var_3280_groups_0 = const()[name = string("op_3280_groups_0"), val = int32(1)]; tensor var_3280_strides_0 = const()[name = string("op_3280_strides_0"), val = tensor([1])]; tensor var_3280_pad_0 = const()[name = string("op_3280_pad_0"), val = tensor([0, 0])]; tensor var_3280_dilations_0 = const()[name = string("op_3280_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875858880))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3265_cast_fp16 = transpose(perm = var_3264, x = attn_output_45_cast_fp16)[name = string("transpose_81")]; tensor var_3280_cast_fp16 = conv(dilations = var_3280_dilations_0, groups = var_3280_groups_0, pad = var_3280_pad_0, pad_type = var_3280_pad_type_0, strides = var_3280_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3265_cast_fp16)[name = string("op_3280_cast_fp16")]; tensor var_3284 = const()[name = string("op_3284"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_3284, x = var_3280_cast_fp16)[name = string("transpose_80")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_3303_axes_0 = const()[name = string("op_3303_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875869184)))]; fp16 var_3291_to_fp16 = const()[name = string("op_3291_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3303_cast_fp16 = layer_norm(axes = var_3303_axes_0, epsilon = var_3291_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_3303_cast_fp16")]; tensor var_3317 = const()[name = string("op_3317"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_3318 = transpose(perm = var_3317, x = var_3303_cast_fp16)[name = string("transpose_79")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3318)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_3340_axes_0 = const()[name = string("op_3340_axes_0"), val = tensor([2])]; tensor var_3340 = squeeze(axes = var_3340_axes_0, x = e_9)[name = string("op_3340")]; tensor var_3341 = const()[name = string("op_3341"), val = tensor([0, 2, 1])]; tensor var_3342 = transpose(perm = var_3341, x = var_3340)[name = string("transpose_78")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3342)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_3360_axes_0 = const()[name = string("op_3360_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875874368)))]; fp16 var_3348_to_fp16 = const()[name = string("op_3348_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3360_cast_fp16 = layer_norm(axes = var_3360_axes_0, epsilon = var_3348_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_3360_cast_fp16")]; tensor var_3366 = const()[name = string("op_3366"), val = tensor([0, 2, 1])]; tensor var_3369_axes_0 = const()[name = string("op_3369_axes_0"), val = tensor([2])]; tensor var_3367 = transpose(perm = var_3366, x = var_3360_cast_fp16)[name = string("transpose_77")]; tensor var_3369 = expand_dims(axes = var_3369_axes_0, x = var_3367)[name = string("op_3369")]; string var_3385_pad_type_0 = const()[name = string("op_3385_pad_type_0"), val = string("valid")]; tensor var_3385_strides_0 = const()[name = string("op_3385_strides_0"), val = tensor([1, 1])]; tensor var_3385_pad_0 = const()[name = string("op_3385_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3385_dilations_0 = const()[name = string("op_3385_dilations_0"), val = tensor([1, 1])]; int32 var_3385_groups_0 = const()[name = string("op_3385_groups_0"), val = int32(1)]; tensor var_3385 = conv(dilations = var_3385_dilations_0, groups = var_3385_groups_0, pad = var_3385_pad_0, pad_type = var_3385_pad_type_0, strides = var_3385_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_3369)[name = string("op_3385")]; tensor var_3390 = const()[name = string("op_3390"), val = tensor([1, 32, 1, 128])]; tensor var_3391 = reshape(shape = var_3390, x = var_3385)[name = string("op_3391")]; string var_3407_pad_type_0 = const()[name = string("op_3407_pad_type_0"), val = string("valid")]; tensor var_3407_strides_0 = const()[name = string("op_3407_strides_0"), val = tensor([1, 1])]; tensor var_3407_pad_0 = const()[name = string("op_3407_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3407_dilations_0 = const()[name = string("op_3407_dilations_0"), val = tensor([1, 1])]; int32 var_3407_groups_0 = const()[name = string("op_3407_groups_0"), val = int32(1)]; tensor var_3407 = conv(dilations = var_3407_dilations_0, groups = var_3407_groups_0, pad = var_3407_pad_0, pad_type = var_3407_pad_type_0, strides = var_3407_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_3369)[name = string("op_3407")]; tensor var_3412 = const()[name = string("op_3412"), val = tensor([1, 8, 1, 128])]; tensor var_3413 = reshape(shape = var_3412, x = var_3407)[name = string("op_3413")]; string var_3429_pad_type_0 = const()[name = string("op_3429_pad_type_0"), val = string("valid")]; tensor var_3429_strides_0 = const()[name = string("op_3429_strides_0"), val = tensor([1, 1])]; tensor var_3429_pad_0 = const()[name = string("op_3429_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3429_dilations_0 = const()[name = string("op_3429_dilations_0"), val = tensor([1, 1])]; int32 var_3429_groups_0 = const()[name = string("op_3429_groups_0"), val = int32(1)]; tensor var_3429 = conv(dilations = var_3429_dilations_0, groups = var_3429_groups_0, pad = var_3429_pad_0, pad_type = var_3429_pad_type_0, strides = var_3429_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_3369)[name = string("op_3429")]; tensor var_3434 = const()[name = string("op_3434"), val = tensor([1, 8, 1, 128])]; tensor var_3435 = reshape(shape = var_3434, x = var_3429)[name = string("op_3435")]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = var_3391)[name = string("mean_43")]; tensor input_95 = sub(x = var_3391, y = mean_43)[name = string("input_95")]; tensor var_3456_axes_0 = const()[name = string("op_3456_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879552)))]; fp16 var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3456_cast_fp16 = layer_norm(axes = var_3456_axes_0, epsilon = var_3444_to_fp16, gamma = model_model_layers_23_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_3456_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = var_3413)[name = string("mean_45")]; tensor input_97 = sub(x = var_3413, y = mean_45)[name = string("input_97")]; tensor var_3474_axes_0 = const()[name = string("op_3474_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879872)))]; fp16 var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3474_cast_fp16 = layer_norm(axes = var_3474_axes_0, epsilon = var_3462_to_fp16, gamma = model_model_layers_23_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_3474_cast_fp16")]; tensor var_3477 = mul(x = var_3456_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3477")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_3456_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_3456_cast_fp16)[name = string("x2_21")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_3498 = mul(x = x2_21, y = const_95_promoted)[name = string("op_3498")]; int32 var_3500 = const()[name = string("op_3500"), val = int32(-1)]; bool var_3501_interleave_0 = const()[name = string("op_3501_interleave_0"), val = bool(false)]; tensor var_3501 = concat(axis = var_3500, interleave = var_3501_interleave_0, values = (var_3498, x1_21))[name = string("op_3501")]; tensor var_3502 = mul(x = var_3501, y = sin_1_cast_fp16)[name = string("op_3502")]; tensor query_states_21 = add(x = var_3477, y = var_3502)[name = string("query_states_21")]; tensor var_3505 = mul(x = var_3474_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3505")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_3474_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_3474_cast_fp16)[name = string("x2_23")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_3526 = mul(x = x2_23, y = const_98_promoted)[name = string("op_3526")]; int32 var_3528 = const()[name = string("op_3528"), val = int32(-1)]; bool var_3529_interleave_0 = const()[name = string("op_3529_interleave_0"), val = bool(false)]; tensor var_3529 = concat(axis = var_3528, interleave = var_3529_interleave_0, values = (var_3526, x1_23))[name = string("op_3529")]; tensor var_3530 = mul(x = var_3529, y = sin_1_cast_fp16)[name = string("op_3530")]; tensor key_states_21 = add(x = var_3505, y = var_3530)[name = string("key_states_21")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([23])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([24])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_1195, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_21, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([59])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([60])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_1195, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_3435, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_3585_begin_0 = const()[name = string("op_3585_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_3585_end_0 = const()[name = string("op_3585_end_0"), val = tensor([24, 8, 1024, 128])]; tensor var_3585_end_mask_0 = const()[name = string("op_3585_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = coreml_update_state_47)[name = string("op_3585_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_3585_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_3592_begin_0 = const()[name = string("op_3592_begin_0"), val = tensor([59, 0, 0, 0])]; tensor var_3592_end_0 = const()[name = string("op_3592_end_0"), val = tensor([60, 8, 1024, 128])]; tensor var_3592_end_mask_0 = const()[name = string("op_3592_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3592_cast_fp16 = slice_by_index(begin = var_3592_begin_0, end = var_3592_end_0, end_mask = var_3592_end_mask_0, x = coreml_update_state_47)[name = string("op_3592_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_3592_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_3629 = const()[name = string("op_3629"), val = tensor([1, 4, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_3629, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, -1, 1024, 128])]; tensor key_states_23_cast_fp16 = reshape(shape = var_3641, x = x_109_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_3649 = const()[name = string("op_3649"), val = tensor([1, 4, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_3649, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_3661 = const()[name = string("op_3661"), val = tensor([1, -1, 1024, 128])]; tensor value_states_33_cast_fp16 = reshape(shape = var_3661, x = x_115_cast_fp16)[name = string("value_states_33_cast_fp16")]; bool var_3676_transpose_x_1 = const()[name = string("op_3676_transpose_x_1"), val = bool(false)]; bool var_3676_transpose_y_1 = const()[name = string("op_3676_transpose_y_1"), val = bool(true)]; tensor var_3676 = matmul(transpose_x = var_3676_transpose_x_1, transpose_y = var_3676_transpose_y_1, x = query_states_21, y = key_states_23_cast_fp16)[name = string("op_3676")]; fp16 var_3677_to_fp16 = const()[name = string("op_3677_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_3676, y = var_3677_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_3712 = const()[name = string("op_3712"), val = int32(-1)]; tensor attn_weights_35_cast_fp16 = softmax(axis = var_3712, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = value_states_33_cast_fp16)[name = string("attn_output_51_cast_fp16")]; tensor var_3723_perm_0 = const()[name = string("op_3723_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3727 = const()[name = string("op_3727"), val = tensor([1, 1, 4096])]; tensor var_3723_cast_fp16 = transpose(perm = var_3723_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_76")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_3727, x = var_3723_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_3732 = const()[name = string("op_3732"), val = tensor([0, 2, 1])]; string var_3748_pad_type_0 = const()[name = string("op_3748_pad_type_0"), val = string("valid")]; int32 var_3748_groups_0 = const()[name = string("op_3748_groups_0"), val = int32(1)]; tensor var_3748_strides_0 = const()[name = string("op_3748_strides_0"), val = tensor([1])]; tensor var_3748_pad_0 = const()[name = string("op_3748_pad_0"), val = tensor([0, 0])]; tensor var_3748_dilations_0 = const()[name = string("op_3748_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881123136))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3733_cast_fp16 = transpose(perm = var_3732, x = attn_output_55_cast_fp16)[name = string("transpose_75")]; tensor var_3748_cast_fp16 = conv(dilations = var_3748_dilations_0, groups = var_3748_groups_0, pad = var_3748_pad_0, pad_type = var_3748_pad_type_0, strides = var_3748_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_3733_cast_fp16)[name = string("op_3748_cast_fp16")]; tensor var_3752 = const()[name = string("op_3752"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_3752, x = var_3748_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_3771_axes_0 = const()[name = string("op_3771_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881133440)))]; fp16 var_3759_to_fp16 = const()[name = string("op_3759_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3771_cast_fp16 = layer_norm(axes = var_3771_axes_0, epsilon = var_3759_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_3771_cast_fp16")]; tensor var_3785 = const()[name = string("op_3785"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_3786 = transpose(perm = var_3785, x = var_3771_cast_fp16)[name = string("transpose_73")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_3786)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_3808_axes_0 = const()[name = string("op_3808_axes_0"), val = tensor([2])]; tensor var_3808 = squeeze(axes = var_3808_axes_0, x = e_11)[name = string("op_3808")]; tensor var_3809 = const()[name = string("op_3809"), val = tensor([0, 2, 1])]; tensor var_3810 = transpose(perm = var_3809, x = var_3808)[name = string("transpose_72")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_3810)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_3828_axes_0 = const()[name = string("op_3828_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881138624)))]; fp16 var_3816_to_fp16 = const()[name = string("op_3816_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3828_cast_fp16 = layer_norm(axes = var_3828_axes_0, epsilon = var_3816_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_3828_cast_fp16")]; tensor var_3834 = const()[name = string("op_3834"), val = tensor([0, 2, 1])]; tensor var_3837_axes_0 = const()[name = string("op_3837_axes_0"), val = tensor([2])]; tensor var_3835 = transpose(perm = var_3834, x = var_3828_cast_fp16)[name = string("transpose_71")]; tensor var_3837 = expand_dims(axes = var_3837_axes_0, x = var_3835)[name = string("op_3837")]; string var_3853_pad_type_0 = const()[name = string("op_3853_pad_type_0"), val = string("valid")]; tensor var_3853_strides_0 = const()[name = string("op_3853_strides_0"), val = tensor([1, 1])]; tensor var_3853_pad_0 = const()[name = string("op_3853_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3853_dilations_0 = const()[name = string("op_3853_dilations_0"), val = tensor([1, 1])]; int32 var_3853_groups_0 = const()[name = string("op_3853_groups_0"), val = int32(1)]; tensor var_3853 = conv(dilations = var_3853_dilations_0, groups = var_3853_groups_0, pad = var_3853_pad_0, pad_type = var_3853_pad_type_0, strides = var_3853_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_3837)[name = string("op_3853")]; tensor var_3858 = const()[name = string("op_3858"), val = tensor([1, 32, 1, 128])]; tensor var_3859 = reshape(shape = var_3858, x = var_3853)[name = string("op_3859")]; string var_3875_pad_type_0 = const()[name = string("op_3875_pad_type_0"), val = string("valid")]; tensor var_3875_strides_0 = const()[name = string("op_3875_strides_0"), val = tensor([1, 1])]; tensor var_3875_pad_0 = const()[name = string("op_3875_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3875_dilations_0 = const()[name = string("op_3875_dilations_0"), val = tensor([1, 1])]; int32 var_3875_groups_0 = const()[name = string("op_3875_groups_0"), val = int32(1)]; tensor var_3875 = conv(dilations = var_3875_dilations_0, groups = var_3875_groups_0, pad = var_3875_pad_0, pad_type = var_3875_pad_type_0, strides = var_3875_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_3837)[name = string("op_3875")]; tensor var_3880 = const()[name = string("op_3880"), val = tensor([1, 8, 1, 128])]; tensor var_3881 = reshape(shape = var_3880, x = var_3875)[name = string("op_3881")]; string var_3897_pad_type_0 = const()[name = string("op_3897_pad_type_0"), val = string("valid")]; tensor var_3897_strides_0 = const()[name = string("op_3897_strides_0"), val = tensor([1, 1])]; tensor var_3897_pad_0 = const()[name = string("op_3897_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3897_dilations_0 = const()[name = string("op_3897_dilations_0"), val = tensor([1, 1])]; int32 var_3897_groups_0 = const()[name = string("op_3897_groups_0"), val = int32(1)]; tensor var_3897 = conv(dilations = var_3897_dilations_0, groups = var_3897_groups_0, pad = var_3897_pad_0, pad_type = var_3897_pad_type_0, strides = var_3897_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_3837)[name = string("op_3897")]; tensor var_3902 = const()[name = string("op_3902"), val = tensor([1, 8, 1, 128])]; tensor var_3903 = reshape(shape = var_3902, x = var_3897)[name = string("op_3903")]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = var_3859)[name = string("mean_51")]; tensor input_113 = sub(x = var_3859, y = mean_51)[name = string("input_113")]; tensor var_3924_axes_0 = const()[name = string("op_3924_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881143808)))]; fp16 var_3912_to_fp16 = const()[name = string("op_3912_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3924_cast_fp16 = layer_norm(axes = var_3924_axes_0, epsilon = var_3912_to_fp16, gamma = model_model_layers_24_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_3924_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = var_3881)[name = string("mean_53")]; tensor input_115 = sub(x = var_3881, y = mean_53)[name = string("input_115")]; tensor var_3942_axes_0 = const()[name = string("op_3942_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144128)))]; fp16 var_3930_to_fp16 = const()[name = string("op_3930_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3942_cast_fp16 = layer_norm(axes = var_3942_axes_0, epsilon = var_3930_to_fp16, gamma = model_model_layers_24_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_3942_cast_fp16")]; tensor var_3945 = mul(x = var_3924_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3945")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_3924_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_3924_cast_fp16)[name = string("x2_25")]; fp16 const_113_promoted = const()[name = string("const_113_promoted"), val = fp16(-0x1p+0)]; tensor var_3966 = mul(x = x2_25, y = const_113_promoted)[name = string("op_3966")]; int32 var_3968 = const()[name = string("op_3968"), val = int32(-1)]; bool var_3969_interleave_0 = const()[name = string("op_3969_interleave_0"), val = bool(false)]; tensor var_3969 = concat(axis = var_3968, interleave = var_3969_interleave_0, values = (var_3966, x1_25))[name = string("op_3969")]; tensor var_3970 = mul(x = var_3969, y = sin_1_cast_fp16)[name = string("op_3970")]; tensor query_states_25 = add(x = var_3945, y = var_3970)[name = string("query_states_25")]; tensor var_3973 = mul(x = var_3942_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3973")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_3942_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_3942_cast_fp16)[name = string("x2_27")]; fp16 const_116_promoted = const()[name = string("const_116_promoted"), val = fp16(-0x1p+0)]; tensor var_3994 = mul(x = x2_27, y = const_116_promoted)[name = string("op_3994")]; int32 var_3996 = const()[name = string("op_3996"), val = int32(-1)]; bool var_3997_interleave_0 = const()[name = string("op_3997_interleave_0"), val = bool(false)]; tensor var_3997 = concat(axis = var_3996, interleave = var_3997_interleave_0, values = (var_3994, x1_27))[name = string("op_3997")]; tensor var_3998 = mul(x = var_3997, y = sin_1_cast_fp16)[name = string("op_3998")]; tensor key_states_25 = add(x = var_3973, y = var_3998)[name = string("key_states_25")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([24])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([25])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1195, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_25, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([60])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([61])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1195, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_3903, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_4053_begin_0 = const()[name = string("op_4053_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_4053_end_0 = const()[name = string("op_4053_end_0"), val = tensor([25, 8, 1024, 128])]; tensor var_4053_end_mask_0 = const()[name = string("op_4053_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, x = coreml_update_state_49)[name = string("op_4053_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4053_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4060_begin_0 = const()[name = string("op_4060_begin_0"), val = tensor([60, 0, 0, 0])]; tensor var_4060_end_0 = const()[name = string("op_4060_end_0"), val = tensor([61, 8, 1024, 128])]; tensor var_4060_end_mask_0 = const()[name = string("op_4060_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4060_cast_fp16 = slice_by_index(begin = var_4060_begin_0, end = var_4060_end_0, end_mask = var_4060_end_mask_0, x = coreml_update_state_49)[name = string("op_4060_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4060_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4097 = const()[name = string("op_4097"), val = tensor([1, 4, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4097, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4109 = const()[name = string("op_4109"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_4109, x = x_129_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4117 = const()[name = string("op_4117"), val = tensor([1, 4, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4117, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_4129 = const()[name = string("op_4129"), val = tensor([1, -1, 1024, 128])]; tensor value_states_39_cast_fp16 = reshape(shape = var_4129, x = x_135_cast_fp16)[name = string("value_states_39_cast_fp16")]; bool var_4144_transpose_x_1 = const()[name = string("op_4144_transpose_x_1"), val = bool(false)]; bool var_4144_transpose_y_1 = const()[name = string("op_4144_transpose_y_1"), val = bool(true)]; tensor var_4144 = matmul(transpose_x = var_4144_transpose_x_1, transpose_y = var_4144_transpose_y_1, x = query_states_25, y = key_states_27_cast_fp16)[name = string("op_4144")]; fp16 var_4145_to_fp16 = const()[name = string("op_4145_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_4144, y = var_4145_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_4180 = const()[name = string("op_4180"), val = int32(-1)]; tensor attn_weights_41_cast_fp16 = softmax(axis = var_4180, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_4191_perm_0 = const()[name = string("op_4191_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4195 = const()[name = string("op_4195"), val = tensor([1, 1, 4096])]; tensor var_4191_cast_fp16 = transpose(perm = var_4191_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_70")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_4195, x = var_4191_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_4200 = const()[name = string("op_4200"), val = tensor([0, 2, 1])]; string var_4216_pad_type_0 = const()[name = string("op_4216_pad_type_0"), val = string("valid")]; int32 var_4216_groups_0 = const()[name = string("op_4216_groups_0"), val = int32(1)]; tensor var_4216_strides_0 = const()[name = string("op_4216_strides_0"), val = tensor([1])]; tensor var_4216_pad_0 = const()[name = string("op_4216_pad_0"), val = tensor([0, 0])]; tensor var_4216_dilations_0 = const()[name = string("op_4216_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886387392))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4201_cast_fp16 = transpose(perm = var_4200, x = attn_output_65_cast_fp16)[name = string("transpose_69")]; tensor var_4216_cast_fp16 = conv(dilations = var_4216_dilations_0, groups = var_4216_groups_0, pad = var_4216_pad_0, pad_type = var_4216_pad_type_0, strides = var_4216_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4201_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor var_4220 = const()[name = string("op_4220"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_4220, x = var_4216_cast_fp16)[name = string("transpose_68")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_4239_axes_0 = const()[name = string("op_4239_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886397696)))]; fp16 var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4239_cast_fp16 = layer_norm(axes = var_4239_axes_0, epsilon = var_4227_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_4239_cast_fp16")]; tensor var_4253 = const()[name = string("op_4253"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4254 = transpose(perm = var_4253, x = var_4239_cast_fp16)[name = string("transpose_67")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4254)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_4276_axes_0 = const()[name = string("op_4276_axes_0"), val = tensor([2])]; tensor var_4276 = squeeze(axes = var_4276_axes_0, x = e_13)[name = string("op_4276")]; tensor var_4277 = const()[name = string("op_4277"), val = tensor([0, 2, 1])]; tensor var_4278 = transpose(perm = var_4277, x = var_4276)[name = string("transpose_66")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_4278)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886402880)))]; fp16 var_4284_to_fp16 = const()[name = string("op_4284_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, epsilon = var_4284_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_4296_cast_fp16")]; tensor var_4302 = const()[name = string("op_4302"), val = tensor([0, 2, 1])]; tensor var_4305_axes_0 = const()[name = string("op_4305_axes_0"), val = tensor([2])]; tensor var_4303 = transpose(perm = var_4302, x = var_4296_cast_fp16)[name = string("transpose_65")]; tensor var_4305 = expand_dims(axes = var_4305_axes_0, x = var_4303)[name = string("op_4305")]; string var_4321_pad_type_0 = const()[name = string("op_4321_pad_type_0"), val = string("valid")]; tensor var_4321_strides_0 = const()[name = string("op_4321_strides_0"), val = tensor([1, 1])]; tensor var_4321_pad_0 = const()[name = string("op_4321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4321_dilations_0 = const()[name = string("op_4321_dilations_0"), val = tensor([1, 1])]; int32 var_4321_groups_0 = const()[name = string("op_4321_groups_0"), val = int32(1)]; tensor var_4321 = conv(dilations = var_4321_dilations_0, groups = var_4321_groups_0, pad = var_4321_pad_0, pad_type = var_4321_pad_type_0, strides = var_4321_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_4305)[name = string("op_4321")]; tensor var_4326 = const()[name = string("op_4326"), val = tensor([1, 32, 1, 128])]; tensor var_4327 = reshape(shape = var_4326, x = var_4321)[name = string("op_4327")]; string var_4343_pad_type_0 = const()[name = string("op_4343_pad_type_0"), val = string("valid")]; tensor var_4343_strides_0 = const()[name = string("op_4343_strides_0"), val = tensor([1, 1])]; tensor var_4343_pad_0 = const()[name = string("op_4343_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4343_dilations_0 = const()[name = string("op_4343_dilations_0"), val = tensor([1, 1])]; int32 var_4343_groups_0 = const()[name = string("op_4343_groups_0"), val = int32(1)]; tensor var_4343 = conv(dilations = var_4343_dilations_0, groups = var_4343_groups_0, pad = var_4343_pad_0, pad_type = var_4343_pad_type_0, strides = var_4343_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_4305)[name = string("op_4343")]; tensor var_4348 = const()[name = string("op_4348"), val = tensor([1, 8, 1, 128])]; tensor var_4349 = reshape(shape = var_4348, x = var_4343)[name = string("op_4349")]; string var_4365_pad_type_0 = const()[name = string("op_4365_pad_type_0"), val = string("valid")]; tensor var_4365_strides_0 = const()[name = string("op_4365_strides_0"), val = tensor([1, 1])]; tensor var_4365_pad_0 = const()[name = string("op_4365_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4365_dilations_0 = const()[name = string("op_4365_dilations_0"), val = tensor([1, 1])]; int32 var_4365_groups_0 = const()[name = string("op_4365_groups_0"), val = int32(1)]; tensor var_4365 = conv(dilations = var_4365_dilations_0, groups = var_4365_groups_0, pad = var_4365_pad_0, pad_type = var_4365_pad_type_0, strides = var_4365_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_4305)[name = string("op_4365")]; tensor var_4370 = const()[name = string("op_4370"), val = tensor([1, 8, 1, 128])]; tensor var_4371 = reshape(shape = var_4370, x = var_4365)[name = string("op_4371")]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = var_4327)[name = string("mean_59")]; tensor input_131 = sub(x = var_4327, y = mean_59)[name = string("input_131")]; tensor var_4392_axes_0 = const()[name = string("op_4392_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408064)))]; fp16 var_4380_to_fp16 = const()[name = string("op_4380_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4392_cast_fp16 = layer_norm(axes = var_4392_axes_0, epsilon = var_4380_to_fp16, gamma = model_model_layers_25_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_4392_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = var_4349)[name = string("mean_61")]; tensor input_133 = sub(x = var_4349, y = mean_61)[name = string("input_133")]; tensor var_4410_axes_0 = const()[name = string("op_4410_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408384)))]; fp16 var_4398_to_fp16 = const()[name = string("op_4398_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4410_cast_fp16 = layer_norm(axes = var_4410_axes_0, epsilon = var_4398_to_fp16, gamma = model_model_layers_25_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_4410_cast_fp16")]; tensor var_4413 = mul(x = var_4392_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4413")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_4392_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_4392_cast_fp16)[name = string("x2_29")]; fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; tensor var_4434 = mul(x = x2_29, y = const_131_promoted)[name = string("op_4434")]; int32 var_4436 = const()[name = string("op_4436"), val = int32(-1)]; bool var_4437_interleave_0 = const()[name = string("op_4437_interleave_0"), val = bool(false)]; tensor var_4437 = concat(axis = var_4436, interleave = var_4437_interleave_0, values = (var_4434, x1_29))[name = string("op_4437")]; tensor var_4438 = mul(x = var_4437, y = sin_1_cast_fp16)[name = string("op_4438")]; tensor query_states_29 = add(x = var_4413, y = var_4438)[name = string("query_states_29")]; tensor var_4441 = mul(x = var_4410_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4441")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_4410_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_4410_cast_fp16)[name = string("x2_31")]; fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; tensor var_4462 = mul(x = x2_31, y = const_134_promoted)[name = string("op_4462")]; int32 var_4464 = const()[name = string("op_4464"), val = int32(-1)]; bool var_4465_interleave_0 = const()[name = string("op_4465_interleave_0"), val = bool(false)]; tensor var_4465 = concat(axis = var_4464, interleave = var_4465_interleave_0, values = (var_4462, x1_31))[name = string("op_4465")]; tensor var_4466 = mul(x = var_4465, y = sin_1_cast_fp16)[name = string("op_4466")]; tensor key_states_29 = add(x = var_4441, y = var_4466)[name = string("key_states_29")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([25])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([26])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1195, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_29, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([61])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([62])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1195, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_4371, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_4521_begin_0 = const()[name = string("op_4521_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_4521_end_0 = const()[name = string("op_4521_end_0"), val = tensor([26, 8, 1024, 128])]; tensor var_4521_end_mask_0 = const()[name = string("op_4521_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4521_cast_fp16 = slice_by_index(begin = var_4521_begin_0, end = var_4521_end_0, end_mask = var_4521_end_mask_0, x = coreml_update_state_51)[name = string("op_4521_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_4521_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_4528_begin_0 = const()[name = string("op_4528_begin_0"), val = tensor([61, 0, 0, 0])]; tensor var_4528_end_0 = const()[name = string("op_4528_end_0"), val = tensor([62, 8, 1024, 128])]; tensor var_4528_end_mask_0 = const()[name = string("op_4528_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4528_cast_fp16 = slice_by_index(begin = var_4528_begin_0, end = var_4528_end_0, end_mask = var_4528_end_mask_0, x = coreml_update_state_51)[name = string("op_4528_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_4528_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_4565 = const()[name = string("op_4565"), val = tensor([1, 4, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_4565, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_4577 = const()[name = string("op_4577"), val = tensor([1, -1, 1024, 128])]; tensor key_states_31_cast_fp16 = reshape(shape = var_4577, x = x_149_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_4585 = const()[name = string("op_4585"), val = tensor([1, 4, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_4585, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; tensor var_4597 = const()[name = string("op_4597"), val = tensor([1, -1, 1024, 128])]; tensor value_states_45_cast_fp16 = reshape(shape = var_4597, x = x_155_cast_fp16)[name = string("value_states_45_cast_fp16")]; bool var_4612_transpose_x_1 = const()[name = string("op_4612_transpose_x_1"), val = bool(false)]; bool var_4612_transpose_y_1 = const()[name = string("op_4612_transpose_y_1"), val = bool(true)]; tensor var_4612 = matmul(transpose_x = var_4612_transpose_x_1, transpose_y = var_4612_transpose_y_1, x = query_states_29, y = key_states_31_cast_fp16)[name = string("op_4612")]; fp16 var_4613_to_fp16 = const()[name = string("op_4613_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_4612, y = var_4613_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_4648 = const()[name = string("op_4648"), val = int32(-1)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_4648, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = value_states_45_cast_fp16)[name = string("attn_output_71_cast_fp16")]; tensor var_4659_perm_0 = const()[name = string("op_4659_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4663 = const()[name = string("op_4663"), val = tensor([1, 1, 4096])]; tensor var_4659_cast_fp16 = transpose(perm = var_4659_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_64")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_4663, x = var_4659_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_4668 = const()[name = string("op_4668"), val = tensor([0, 2, 1])]; string var_4684_pad_type_0 = const()[name = string("op_4684_pad_type_0"), val = string("valid")]; int32 var_4684_groups_0 = const()[name = string("op_4684_groups_0"), val = int32(1)]; tensor var_4684_strides_0 = const()[name = string("op_4684_strides_0"), val = tensor([1])]; tensor var_4684_pad_0 = const()[name = string("op_4684_pad_0"), val = tensor([0, 0])]; tensor var_4684_dilations_0 = const()[name = string("op_4684_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891651648))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4669_cast_fp16 = transpose(perm = var_4668, x = attn_output_75_cast_fp16)[name = string("transpose_63")]; tensor var_4684_cast_fp16 = conv(dilations = var_4684_dilations_0, groups = var_4684_groups_0, pad = var_4684_pad_0, pad_type = var_4684_pad_type_0, strides = var_4684_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_4669_cast_fp16)[name = string("op_4684_cast_fp16")]; tensor var_4688 = const()[name = string("op_4688"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_4688, x = var_4684_cast_fp16)[name = string("transpose_62")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_4707_axes_0 = const()[name = string("op_4707_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891661952)))]; fp16 var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4707_cast_fp16 = layer_norm(axes = var_4707_axes_0, epsilon = var_4695_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_4707_cast_fp16")]; tensor var_4721 = const()[name = string("op_4721"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_4722 = transpose(perm = var_4721, x = var_4707_cast_fp16)[name = string("transpose_61")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_4722)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_4744_axes_0 = const()[name = string("op_4744_axes_0"), val = tensor([2])]; tensor var_4744 = squeeze(axes = var_4744_axes_0, x = e_15)[name = string("op_4744")]; tensor var_4745 = const()[name = string("op_4745"), val = tensor([0, 2, 1])]; tensor var_4746 = transpose(perm = var_4745, x = var_4744)[name = string("transpose_60")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_4746)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_4764_axes_0 = const()[name = string("op_4764_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891667136)))]; fp16 var_4752_to_fp16 = const()[name = string("op_4752_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4764_cast_fp16 = layer_norm(axes = var_4764_axes_0, epsilon = var_4752_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_4764_cast_fp16")]; tensor var_4770 = const()[name = string("op_4770"), val = tensor([0, 2, 1])]; tensor var_4773_axes_0 = const()[name = string("op_4773_axes_0"), val = tensor([2])]; tensor var_4771 = transpose(perm = var_4770, x = var_4764_cast_fp16)[name = string("transpose_59")]; tensor var_4773 = expand_dims(axes = var_4773_axes_0, x = var_4771)[name = string("op_4773")]; string var_4789_pad_type_0 = const()[name = string("op_4789_pad_type_0"), val = string("valid")]; tensor var_4789_strides_0 = const()[name = string("op_4789_strides_0"), val = tensor([1, 1])]; tensor var_4789_pad_0 = const()[name = string("op_4789_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4789_dilations_0 = const()[name = string("op_4789_dilations_0"), val = tensor([1, 1])]; int32 var_4789_groups_0 = const()[name = string("op_4789_groups_0"), val = int32(1)]; tensor var_4789 = conv(dilations = var_4789_dilations_0, groups = var_4789_groups_0, pad = var_4789_pad_0, pad_type = var_4789_pad_type_0, strides = var_4789_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_4773)[name = string("op_4789")]; tensor var_4794 = const()[name = string("op_4794"), val = tensor([1, 32, 1, 128])]; tensor var_4795 = reshape(shape = var_4794, x = var_4789)[name = string("op_4795")]; string var_4811_pad_type_0 = const()[name = string("op_4811_pad_type_0"), val = string("valid")]; tensor var_4811_strides_0 = const()[name = string("op_4811_strides_0"), val = tensor([1, 1])]; tensor var_4811_pad_0 = const()[name = string("op_4811_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4811_dilations_0 = const()[name = string("op_4811_dilations_0"), val = tensor([1, 1])]; int32 var_4811_groups_0 = const()[name = string("op_4811_groups_0"), val = int32(1)]; tensor var_4811 = conv(dilations = var_4811_dilations_0, groups = var_4811_groups_0, pad = var_4811_pad_0, pad_type = var_4811_pad_type_0, strides = var_4811_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_4773)[name = string("op_4811")]; tensor var_4816 = const()[name = string("op_4816"), val = tensor([1, 8, 1, 128])]; tensor var_4817 = reshape(shape = var_4816, x = var_4811)[name = string("op_4817")]; string var_4833_pad_type_0 = const()[name = string("op_4833_pad_type_0"), val = string("valid")]; tensor var_4833_strides_0 = const()[name = string("op_4833_strides_0"), val = tensor([1, 1])]; tensor var_4833_pad_0 = const()[name = string("op_4833_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4833_dilations_0 = const()[name = string("op_4833_dilations_0"), val = tensor([1, 1])]; int32 var_4833_groups_0 = const()[name = string("op_4833_groups_0"), val = int32(1)]; tensor var_4833 = conv(dilations = var_4833_dilations_0, groups = var_4833_groups_0, pad = var_4833_pad_0, pad_type = var_4833_pad_type_0, strides = var_4833_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_4773)[name = string("op_4833")]; tensor var_4838 = const()[name = string("op_4838"), val = tensor([1, 8, 1, 128])]; tensor var_4839 = reshape(shape = var_4838, x = var_4833)[name = string("op_4839")]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = var_4795)[name = string("mean_67")]; tensor input_149 = sub(x = var_4795, y = mean_67)[name = string("input_149")]; tensor var_4860_axes_0 = const()[name = string("op_4860_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672320)))]; fp16 var_4848_to_fp16 = const()[name = string("op_4848_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4860_cast_fp16 = layer_norm(axes = var_4860_axes_0, epsilon = var_4848_to_fp16, gamma = model_model_layers_26_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_4860_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = var_4817)[name = string("mean_69")]; tensor input_151 = sub(x = var_4817, y = mean_69)[name = string("input_151")]; tensor var_4878_axes_0 = const()[name = string("op_4878_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672640)))]; fp16 var_4866_to_fp16 = const()[name = string("op_4866_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4878_cast_fp16 = layer_norm(axes = var_4878_axes_0, epsilon = var_4866_to_fp16, gamma = model_model_layers_26_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_4878_cast_fp16")]; tensor var_4881 = mul(x = var_4860_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4881")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_4860_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_4860_cast_fp16)[name = string("x2_33")]; fp16 const_149_promoted = const()[name = string("const_149_promoted"), val = fp16(-0x1p+0)]; tensor var_4902 = mul(x = x2_33, y = const_149_promoted)[name = string("op_4902")]; int32 var_4904 = const()[name = string("op_4904"), val = int32(-1)]; bool var_4905_interleave_0 = const()[name = string("op_4905_interleave_0"), val = bool(false)]; tensor var_4905 = concat(axis = var_4904, interleave = var_4905_interleave_0, values = (var_4902, x1_33))[name = string("op_4905")]; tensor var_4906 = mul(x = var_4905, y = sin_1_cast_fp16)[name = string("op_4906")]; tensor query_states_33 = add(x = var_4881, y = var_4906)[name = string("query_states_33")]; tensor var_4909 = mul(x = var_4878_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4909")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_4878_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_4878_cast_fp16)[name = string("x2_35")]; fp16 const_152_promoted = const()[name = string("const_152_promoted"), val = fp16(-0x1p+0)]; tensor var_4930 = mul(x = x2_35, y = const_152_promoted)[name = string("op_4930")]; int32 var_4932 = const()[name = string("op_4932"), val = int32(-1)]; bool var_4933_interleave_0 = const()[name = string("op_4933_interleave_0"), val = bool(false)]; tensor var_4933 = concat(axis = var_4932, interleave = var_4933_interleave_0, values = (var_4930, x1_35))[name = string("op_4933")]; tensor var_4934 = mul(x = var_4933, y = sin_1_cast_fp16)[name = string("op_4934")]; tensor key_states_33 = add(x = var_4909, y = var_4934)[name = string("key_states_33")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([26])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([27])]; int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1195, concat_67_values3_0))[name = string("concat_67")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_33, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([62])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([63])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1195, concat_71_values3_0))[name = string("concat_71")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_4839, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_4989_begin_0 = const()[name = string("op_4989_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_4989_end_0 = const()[name = string("op_4989_end_0"), val = tensor([27, 8, 1024, 128])]; tensor var_4989_end_mask_0 = const()[name = string("op_4989_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4989_cast_fp16 = slice_by_index(begin = var_4989_begin_0, end = var_4989_end_0, end_mask = var_4989_end_mask_0, x = coreml_update_state_53)[name = string("op_4989_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_4989_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_4996_begin_0 = const()[name = string("op_4996_begin_0"), val = tensor([62, 0, 0, 0])]; tensor var_4996_end_0 = const()[name = string("op_4996_end_0"), val = tensor([63, 8, 1024, 128])]; tensor var_4996_end_mask_0 = const()[name = string("op_4996_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4996_cast_fp16 = slice_by_index(begin = var_4996_begin_0, end = var_4996_end_0, end_mask = var_4996_end_mask_0, x = coreml_update_state_53)[name = string("op_4996_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_4996_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5033 = const()[name = string("op_5033"), val = tensor([1, 4, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5033, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5045 = const()[name = string("op_5045"), val = tensor([1, -1, 1024, 128])]; tensor key_states_35_cast_fp16 = reshape(shape = var_5045, x = x_169_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5053 = const()[name = string("op_5053"), val = tensor([1, 4, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5053, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_5065 = const()[name = string("op_5065"), val = tensor([1, -1, 1024, 128])]; tensor value_states_51_cast_fp16 = reshape(shape = var_5065, x = x_175_cast_fp16)[name = string("value_states_51_cast_fp16")]; bool var_5080_transpose_x_1 = const()[name = string("op_5080_transpose_x_1"), val = bool(false)]; bool var_5080_transpose_y_1 = const()[name = string("op_5080_transpose_y_1"), val = bool(true)]; tensor var_5080 = matmul(transpose_x = var_5080_transpose_x_1, transpose_y = var_5080_transpose_y_1, x = query_states_33, y = key_states_35_cast_fp16)[name = string("op_5080")]; fp16 var_5081_to_fp16 = const()[name = string("op_5081_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_5080, y = var_5081_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_5116 = const()[name = string("op_5116"), val = int32(-1)]; tensor attn_weights_53_cast_fp16 = softmax(axis = var_5116, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_5127_perm_0 = const()[name = string("op_5127_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5131 = const()[name = string("op_5131"), val = tensor([1, 1, 4096])]; tensor var_5127_cast_fp16 = transpose(perm = var_5127_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_58")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_5131, x = var_5127_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_5136 = const()[name = string("op_5136"), val = tensor([0, 2, 1])]; string var_5152_pad_type_0 = const()[name = string("op_5152_pad_type_0"), val = string("valid")]; int32 var_5152_groups_0 = const()[name = string("op_5152_groups_0"), val = int32(1)]; tensor var_5152_strides_0 = const()[name = string("op_5152_strides_0"), val = tensor([1])]; tensor var_5152_pad_0 = const()[name = string("op_5152_pad_0"), val = tensor([0, 0])]; tensor var_5152_dilations_0 = const()[name = string("op_5152_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896915904))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5137_cast_fp16 = transpose(perm = var_5136, x = attn_output_85_cast_fp16)[name = string("transpose_57")]; tensor var_5152_cast_fp16 = conv(dilations = var_5152_dilations_0, groups = var_5152_groups_0, pad = var_5152_pad_0, pad_type = var_5152_pad_type_0, strides = var_5152_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5137_cast_fp16)[name = string("op_5152_cast_fp16")]; tensor var_5156 = const()[name = string("op_5156"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_5156, x = var_5152_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_5175_axes_0 = const()[name = string("op_5175_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896926208)))]; fp16 var_5163_to_fp16 = const()[name = string("op_5163_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5175_cast_fp16 = layer_norm(axes = var_5175_axes_0, epsilon = var_5163_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_5175_cast_fp16")]; tensor var_5189 = const()[name = string("op_5189"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_5190 = transpose(perm = var_5189, x = var_5175_cast_fp16)[name = string("transpose_55")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5190)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_5212_axes_0 = const()[name = string("op_5212_axes_0"), val = tensor([2])]; tensor var_5212 = squeeze(axes = var_5212_axes_0, x = e_17)[name = string("op_5212")]; tensor var_5213 = const()[name = string("op_5213"), val = tensor([0, 2, 1])]; tensor var_5214 = transpose(perm = var_5213, x = var_5212)[name = string("transpose_54")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_5214)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_5232_axes_0 = const()[name = string("op_5232_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896931392)))]; fp16 var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5232_cast_fp16 = layer_norm(axes = var_5232_axes_0, epsilon = var_5220_to_fp16, gamma = model_model_layers_27_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_5232_cast_fp16")]; tensor var_5238 = const()[name = string("op_5238"), val = tensor([0, 2, 1])]; tensor var_5241_axes_0 = const()[name = string("op_5241_axes_0"), val = tensor([2])]; tensor var_5239 = transpose(perm = var_5238, x = var_5232_cast_fp16)[name = string("transpose_53")]; tensor var_5241 = expand_dims(axes = var_5241_axes_0, x = var_5239)[name = string("op_5241")]; string var_5257_pad_type_0 = const()[name = string("op_5257_pad_type_0"), val = string("valid")]; tensor var_5257_strides_0 = const()[name = string("op_5257_strides_0"), val = tensor([1, 1])]; tensor var_5257_pad_0 = const()[name = string("op_5257_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5257_dilations_0 = const()[name = string("op_5257_dilations_0"), val = tensor([1, 1])]; int32 var_5257_groups_0 = const()[name = string("op_5257_groups_0"), val = int32(1)]; tensor var_5257 = conv(dilations = var_5257_dilations_0, groups = var_5257_groups_0, pad = var_5257_pad_0, pad_type = var_5257_pad_type_0, strides = var_5257_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_5241)[name = string("op_5257")]; tensor var_5262 = const()[name = string("op_5262"), val = tensor([1, 32, 1, 128])]; tensor var_5263 = reshape(shape = var_5262, x = var_5257)[name = string("op_5263")]; string var_5279_pad_type_0 = const()[name = string("op_5279_pad_type_0"), val = string("valid")]; tensor var_5279_strides_0 = const()[name = string("op_5279_strides_0"), val = tensor([1, 1])]; tensor var_5279_pad_0 = const()[name = string("op_5279_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5279_dilations_0 = const()[name = string("op_5279_dilations_0"), val = tensor([1, 1])]; int32 var_5279_groups_0 = const()[name = string("op_5279_groups_0"), val = int32(1)]; tensor var_5279 = conv(dilations = var_5279_dilations_0, groups = var_5279_groups_0, pad = var_5279_pad_0, pad_type = var_5279_pad_type_0, strides = var_5279_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_5241)[name = string("op_5279")]; tensor var_5284 = const()[name = string("op_5284"), val = tensor([1, 8, 1, 128])]; tensor var_5285 = reshape(shape = var_5284, x = var_5279)[name = string("op_5285")]; string var_5301_pad_type_0 = const()[name = string("op_5301_pad_type_0"), val = string("valid")]; tensor var_5301_strides_0 = const()[name = string("op_5301_strides_0"), val = tensor([1, 1])]; tensor var_5301_pad_0 = const()[name = string("op_5301_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5301_dilations_0 = const()[name = string("op_5301_dilations_0"), val = tensor([1, 1])]; int32 var_5301_groups_0 = const()[name = string("op_5301_groups_0"), val = int32(1)]; tensor var_5301 = conv(dilations = var_5301_dilations_0, groups = var_5301_groups_0, pad = var_5301_pad_0, pad_type = var_5301_pad_type_0, strides = var_5301_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_5241)[name = string("op_5301")]; tensor var_5306 = const()[name = string("op_5306"), val = tensor([1, 8, 1, 128])]; tensor var_5307 = reshape(shape = var_5306, x = var_5301)[name = string("op_5307")]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = var_5263)[name = string("mean_75")]; tensor input_167 = sub(x = var_5263, y = mean_75)[name = string("input_167")]; tensor var_5328_axes_0 = const()[name = string("op_5328_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936576)))]; fp16 var_5316_to_fp16 = const()[name = string("op_5316_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5328_cast_fp16 = layer_norm(axes = var_5328_axes_0, epsilon = var_5316_to_fp16, gamma = model_model_layers_27_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_5328_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = var_5285)[name = string("mean_77")]; tensor input_169 = sub(x = var_5285, y = mean_77)[name = string("input_169")]; tensor var_5346_axes_0 = const()[name = string("op_5346_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936896)))]; fp16 var_5334_to_fp16 = const()[name = string("op_5334_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5346_cast_fp16 = layer_norm(axes = var_5346_axes_0, epsilon = var_5334_to_fp16, gamma = model_model_layers_27_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_5346_cast_fp16")]; tensor var_5349 = mul(x = var_5328_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5349")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_5328_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_5328_cast_fp16)[name = string("x2_37")]; fp16 const_167_promoted = const()[name = string("const_167_promoted"), val = fp16(-0x1p+0)]; tensor var_5370 = mul(x = x2_37, y = const_167_promoted)[name = string("op_5370")]; int32 var_5372 = const()[name = string("op_5372"), val = int32(-1)]; bool var_5373_interleave_0 = const()[name = string("op_5373_interleave_0"), val = bool(false)]; tensor var_5373 = concat(axis = var_5372, interleave = var_5373_interleave_0, values = (var_5370, x1_37))[name = string("op_5373")]; tensor var_5374 = mul(x = var_5373, y = sin_1_cast_fp16)[name = string("op_5374")]; tensor query_states_37 = add(x = var_5349, y = var_5374)[name = string("query_states_37")]; tensor var_5377 = mul(x = var_5346_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5377")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_5346_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_5346_cast_fp16)[name = string("x2_39")]; fp16 const_170_promoted = const()[name = string("const_170_promoted"), val = fp16(-0x1p+0)]; tensor var_5398 = mul(x = x2_39, y = const_170_promoted)[name = string("op_5398")]; int32 var_5400 = const()[name = string("op_5400"), val = int32(-1)]; bool var_5401_interleave_0 = const()[name = string("op_5401_interleave_0"), val = bool(false)]; tensor var_5401 = concat(axis = var_5400, interleave = var_5401_interleave_0, values = (var_5398, x1_39))[name = string("op_5401")]; tensor var_5402 = mul(x = var_5401, y = sin_1_cast_fp16)[name = string("op_5402")]; tensor key_states_37 = add(x = var_5377, y = var_5402)[name = string("key_states_37")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([27])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([28])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1195, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_37, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([63])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([64])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1195, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = var_5307, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_5457_begin_0 = const()[name = string("op_5457_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_5457_end_0 = const()[name = string("op_5457_end_0"), val = tensor([28, 8, 1024, 128])]; tensor var_5457_end_mask_0 = const()[name = string("op_5457_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5457_cast_fp16 = slice_by_index(begin = var_5457_begin_0, end = var_5457_end_0, end_mask = var_5457_end_mask_0, x = coreml_update_state_55)[name = string("op_5457_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_5457_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_5464_begin_0 = const()[name = string("op_5464_begin_0"), val = tensor([63, 0, 0, 0])]; tensor var_5464_end_0 = const()[name = string("op_5464_end_0"), val = tensor([64, 8, 1024, 128])]; tensor var_5464_end_mask_0 = const()[name = string("op_5464_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5464_cast_fp16 = slice_by_index(begin = var_5464_begin_0, end = var_5464_end_0, end_mask = var_5464_end_mask_0, x = coreml_update_state_55)[name = string("op_5464_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_5464_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_5501 = const()[name = string("op_5501"), val = tensor([1, 4, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_5501, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_5513 = const()[name = string("op_5513"), val = tensor([1, -1, 1024, 128])]; tensor key_states_39_cast_fp16 = reshape(shape = var_5513, x = x_189_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_5521 = const()[name = string("op_5521"), val = tensor([1, 4, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_5521, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_5533 = const()[name = string("op_5533"), val = tensor([1, -1, 1024, 128])]; tensor value_states_57_cast_fp16 = reshape(shape = var_5533, x = x_195_cast_fp16)[name = string("value_states_57_cast_fp16")]; bool var_5548_transpose_x_1 = const()[name = string("op_5548_transpose_x_1"), val = bool(false)]; bool var_5548_transpose_y_1 = const()[name = string("op_5548_transpose_y_1"), val = bool(true)]; tensor var_5548 = matmul(transpose_x = var_5548_transpose_x_1, transpose_y = var_5548_transpose_y_1, x = query_states_37, y = key_states_39_cast_fp16)[name = string("op_5548")]; fp16 var_5549_to_fp16 = const()[name = string("op_5549_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_5548, y = var_5549_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_5584 = const()[name = string("op_5584"), val = int32(-1)]; tensor attn_weights_59_cast_fp16 = softmax(axis = var_5584, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = value_states_57_cast_fp16)[name = string("attn_output_91_cast_fp16")]; tensor var_5595_perm_0 = const()[name = string("op_5595_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5599 = const()[name = string("op_5599"), val = tensor([1, 1, 4096])]; tensor var_5595_cast_fp16 = transpose(perm = var_5595_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_52")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_5599, x = var_5595_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_5604 = const()[name = string("op_5604"), val = tensor([0, 2, 1])]; string var_5620_pad_type_0 = const()[name = string("op_5620_pad_type_0"), val = string("valid")]; int32 var_5620_groups_0 = const()[name = string("op_5620_groups_0"), val = int32(1)]; tensor var_5620_strides_0 = const()[name = string("op_5620_strides_0"), val = tensor([1])]; tensor var_5620_pad_0 = const()[name = string("op_5620_pad_0"), val = tensor([0, 0])]; tensor var_5620_dilations_0 = const()[name = string("op_5620_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896937216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902180160))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5605_cast_fp16 = transpose(perm = var_5604, x = attn_output_95_cast_fp16)[name = string("transpose_51")]; tensor var_5620_cast_fp16 = conv(dilations = var_5620_dilations_0, groups = var_5620_groups_0, pad = var_5620_pad_0, pad_type = var_5620_pad_type_0, strides = var_5620_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_5605_cast_fp16)[name = string("op_5620_cast_fp16")]; tensor var_5624 = const()[name = string("op_5624"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_5624, x = var_5620_cast_fp16)[name = string("transpose_50")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_5643_axes_0 = const()[name = string("op_5643_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902190464)))]; fp16 var_5631_to_fp16 = const()[name = string("op_5631_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5643_cast_fp16 = layer_norm(axes = var_5643_axes_0, epsilon = var_5631_to_fp16, gamma = model_model_layers_27_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_5643_cast_fp16")]; tensor var_5657 = const()[name = string("op_5657"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_5658 = transpose(perm = var_5657, x = var_5643_cast_fp16)[name = string("transpose_49")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_5658)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_5680_axes_0 = const()[name = string("op_5680_axes_0"), val = tensor([2])]; tensor var_5680 = squeeze(axes = var_5680_axes_0, x = e_19)[name = string("op_5680")]; tensor var_5681 = const()[name = string("op_5681"), val = tensor([0, 2, 1])]; tensor var_5682 = transpose(perm = var_5681, x = var_5680)[name = string("transpose_48")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_5682)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_5700_axes_0 = const()[name = string("op_5700_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902195648)))]; fp16 var_5688_to_fp16 = const()[name = string("op_5688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5700_cast_fp16 = layer_norm(axes = var_5700_axes_0, epsilon = var_5688_to_fp16, gamma = model_model_layers_28_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_5700_cast_fp16")]; tensor var_5706 = const()[name = string("op_5706"), val = tensor([0, 2, 1])]; tensor var_5709_axes_0 = const()[name = string("op_5709_axes_0"), val = tensor([2])]; tensor var_5707 = transpose(perm = var_5706, x = var_5700_cast_fp16)[name = string("transpose_47")]; tensor var_5709 = expand_dims(axes = var_5709_axes_0, x = var_5707)[name = string("op_5709")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5725 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = model_model_layers_28_self_attn_q_proj_weight_palettized, x = var_5709)[name = string("op_5725")]; tensor var_5730 = const()[name = string("op_5730"), val = tensor([1, 32, 1, 128])]; tensor var_5731 = reshape(shape = var_5730, x = var_5725)[name = string("op_5731")]; string var_5747_pad_type_0 = const()[name = string("op_5747_pad_type_0"), val = string("valid")]; tensor var_5747_strides_0 = const()[name = string("op_5747_strides_0"), val = tensor([1, 1])]; tensor var_5747_pad_0 = const()[name = string("op_5747_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5747_dilations_0 = const()[name = string("op_5747_dilations_0"), val = tensor([1, 1])]; int32 var_5747_groups_0 = const()[name = string("op_5747_groups_0"), val = int32(1)]; tensor var_5747 = conv(dilations = var_5747_dilations_0, groups = var_5747_groups_0, pad = var_5747_pad_0, pad_type = var_5747_pad_type_0, strides = var_5747_strides_0, weight = model_model_layers_28_self_attn_k_proj_weight_palettized, x = var_5709)[name = string("op_5747")]; tensor var_5752 = const()[name = string("op_5752"), val = tensor([1, 8, 1, 128])]; tensor var_5753 = reshape(shape = var_5752, x = var_5747)[name = string("op_5753")]; string var_5769_pad_type_0 = const()[name = string("op_5769_pad_type_0"), val = string("valid")]; tensor var_5769_strides_0 = const()[name = string("op_5769_strides_0"), val = tensor([1, 1])]; tensor var_5769_pad_0 = const()[name = string("op_5769_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5769_dilations_0 = const()[name = string("op_5769_dilations_0"), val = tensor([1, 1])]; int32 var_5769_groups_0 = const()[name = string("op_5769_groups_0"), val = int32(1)]; tensor var_5769 = conv(dilations = var_5769_dilations_0, groups = var_5769_groups_0, pad = var_5769_pad_0, pad_type = var_5769_pad_type_0, strides = var_5769_strides_0, weight = model_model_layers_28_self_attn_v_proj_weight_palettized, x = var_5709)[name = string("op_5769")]; tensor var_5774 = const()[name = string("op_5774"), val = tensor([1, 8, 1, 128])]; tensor var_5775 = reshape(shape = var_5774, x = var_5769)[name = string("op_5775")]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = var_5731)[name = string("mean_83")]; tensor input_185 = sub(x = var_5731, y = mean_83)[name = string("input_185")]; tensor var_5796_axes_0 = const()[name = string("op_5796_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_28_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902200832)))]; fp16 var_5784_to_fp16 = const()[name = string("op_5784_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5796_cast_fp16 = layer_norm(axes = var_5796_axes_0, epsilon = var_5784_to_fp16, gamma = model_model_layers_28_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_5796_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = var_5753)[name = string("mean_85")]; tensor input_187 = sub(x = var_5753, y = mean_85)[name = string("input_187")]; tensor var_5814_axes_0 = const()[name = string("op_5814_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_28_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201152)))]; fp16 var_5802_to_fp16 = const()[name = string("op_5802_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5814_cast_fp16 = layer_norm(axes = var_5814_axes_0, epsilon = var_5802_to_fp16, gamma = model_model_layers_28_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_5814_cast_fp16")]; tensor var_5817 = mul(x = var_5796_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5817")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_5796_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_5796_cast_fp16)[name = string("x2_41")]; fp16 const_185_promoted = const()[name = string("const_185_promoted"), val = fp16(-0x1p+0)]; tensor var_5838 = mul(x = x2_41, y = const_185_promoted)[name = string("op_5838")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838, x1_41))[name = string("op_5841")]; tensor var_5842 = mul(x = var_5841, y = sin_1_cast_fp16)[name = string("op_5842")]; tensor query_states_41 = add(x = var_5817, y = var_5842)[name = string("query_states_41")]; tensor var_5845 = mul(x = var_5814_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5845")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_5814_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_5814_cast_fp16)[name = string("x2_43")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_5866 = mul(x = x2_43, y = const_188_promoted)[name = string("op_5866")]; int32 var_5868 = const()[name = string("op_5868"), val = int32(-1)]; bool var_5869_interleave_0 = const()[name = string("op_5869_interleave_0"), val = bool(false)]; tensor var_5869 = concat(axis = var_5868, interleave = var_5869_interleave_0, values = (var_5866, x1_43))[name = string("op_5869")]; tensor var_5870 = mul(x = var_5869, y = sin_1_cast_fp16)[name = string("op_5870")]; tensor key_states_41 = add(x = var_5845, y = var_5870)[name = string("key_states_41")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([28])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([29])]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1195, concat_83_values3_0))[name = string("concat_83")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_41, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([64])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([65])]; int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1195, concat_87_values3_0))[name = string("concat_87")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = var_5775, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_5925_begin_0 = const()[name = string("op_5925_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_5925_end_0 = const()[name = string("op_5925_end_0"), val = tensor([29, 8, 1024, 128])]; tensor var_5925_end_mask_0 = const()[name = string("op_5925_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5925_cast_fp16 = slice_by_index(begin = var_5925_begin_0, end = var_5925_end_0, end_mask = var_5925_end_mask_0, x = coreml_update_state_57)[name = string("op_5925_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_5925_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_5932_begin_0 = const()[name = string("op_5932_begin_0"), val = tensor([64, 0, 0, 0])]; tensor var_5932_end_0 = const()[name = string("op_5932_end_0"), val = tensor([65, 8, 1024, 128])]; tensor var_5932_end_mask_0 = const()[name = string("op_5932_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5932_cast_fp16 = slice_by_index(begin = var_5932_begin_0, end = var_5932_end_0, end_mask = var_5932_end_mask_0, x = coreml_update_state_57)[name = string("op_5932_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_5932_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_5969 = const()[name = string("op_5969"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_5969, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_5981 = const()[name = string("op_5981"), val = tensor([1, -1, 1024, 128])]; tensor key_states_43_cast_fp16 = reshape(shape = var_5981, x = x_209_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_5989 = const()[name = string("op_5989"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_5989, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_6001 = const()[name = string("op_6001"), val = tensor([1, -1, 1024, 128])]; tensor value_states_63_cast_fp16 = reshape(shape = var_6001, x = x_215_cast_fp16)[name = string("value_states_63_cast_fp16")]; bool var_6016_transpose_x_1 = const()[name = string("op_6016_transpose_x_1"), val = bool(false)]; bool var_6016_transpose_y_1 = const()[name = string("op_6016_transpose_y_1"), val = bool(true)]; tensor var_6016 = matmul(transpose_x = var_6016_transpose_x_1, transpose_y = var_6016_transpose_y_1, x = query_states_41, y = key_states_43_cast_fp16)[name = string("op_6016")]; fp16 var_6017_to_fp16 = const()[name = string("op_6017_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_6016, y = var_6017_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_6052 = const()[name = string("op_6052"), val = int32(-1)]; tensor attn_weights_65_cast_fp16 = softmax(axis = var_6052, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = value_states_63_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_6063_perm_0 = const()[name = string("op_6063_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6067 = const()[name = string("op_6067"), val = tensor([1, 1, 4096])]; tensor var_6063_cast_fp16 = transpose(perm = var_6063_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_46")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_6067, x = var_6063_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_6072 = const()[name = string("op_6072"), val = tensor([0, 2, 1])]; string var_6088_pad_type_0 = const()[name = string("op_6088_pad_type_0"), val = string("valid")]; int32 var_6088_groups_0 = const()[name = string("op_6088_groups_0"), val = int32(1)]; tensor var_6088_strides_0 = const()[name = string("op_6088_strides_0"), val = tensor([1])]; tensor var_6088_pad_0 = const()[name = string("op_6088_pad_0"), val = tensor([0, 0])]; tensor var_6088_dilations_0 = const()[name = string("op_6088_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907444416))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6073_cast_fp16 = transpose(perm = var_6072, x = attn_output_105_cast_fp16)[name = string("transpose_45")]; tensor var_6088_cast_fp16 = conv(dilations = var_6088_dilations_0, groups = var_6088_groups_0, pad = var_6088_pad_0, pad_type = var_6088_pad_type_0, strides = var_6088_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6073_cast_fp16)[name = string("op_6088_cast_fp16")]; tensor var_6092 = const()[name = string("op_6092"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_6092, x = var_6088_cast_fp16)[name = string("transpose_44")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_6111_axes_0 = const()[name = string("op_6111_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907454720)))]; fp16 var_6099_to_fp16 = const()[name = string("op_6099_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6111_cast_fp16 = layer_norm(axes = var_6111_axes_0, epsilon = var_6099_to_fp16, gamma = model_model_layers_28_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_6111_cast_fp16")]; tensor var_6125 = const()[name = string("op_6125"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_6126 = transpose(perm = var_6125, x = var_6111_cast_fp16)[name = string("transpose_43")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6126)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_28_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_28_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_28_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_6148_axes_0 = const()[name = string("op_6148_axes_0"), val = tensor([2])]; tensor var_6148 = squeeze(axes = var_6148_axes_0, x = e_21)[name = string("op_6148")]; tensor var_6149 = const()[name = string("op_6149"), val = tensor([0, 2, 1])]; tensor var_6150 = transpose(perm = var_6149, x = var_6148)[name = string("transpose_42")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_6150)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_6168_axes_0 = const()[name = string("op_6168_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907459904)))]; fp16 var_6156_to_fp16 = const()[name = string("op_6156_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6168_cast_fp16 = layer_norm(axes = var_6168_axes_0, epsilon = var_6156_to_fp16, gamma = model_model_layers_29_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_6168_cast_fp16")]; tensor var_6174 = const()[name = string("op_6174"), val = tensor([0, 2, 1])]; tensor var_6177_axes_0 = const()[name = string("op_6177_axes_0"), val = tensor([2])]; tensor var_6175 = transpose(perm = var_6174, x = var_6168_cast_fp16)[name = string("transpose_41")]; tensor var_6177 = expand_dims(axes = var_6177_axes_0, x = var_6175)[name = string("op_6177")]; string var_6193_pad_type_0 = const()[name = string("op_6193_pad_type_0"), val = string("valid")]; tensor var_6193_strides_0 = const()[name = string("op_6193_strides_0"), val = tensor([1, 1])]; tensor var_6193_pad_0 = const()[name = string("op_6193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6193_dilations_0 = const()[name = string("op_6193_dilations_0"), val = tensor([1, 1])]; int32 var_6193_groups_0 = const()[name = string("op_6193_groups_0"), val = int32(1)]; tensor var_6193 = conv(dilations = var_6193_dilations_0, groups = var_6193_groups_0, pad = var_6193_pad_0, pad_type = var_6193_pad_type_0, strides = var_6193_strides_0, weight = model_model_layers_29_self_attn_q_proj_weight_palettized, x = var_6177)[name = string("op_6193")]; tensor var_6198 = const()[name = string("op_6198"), val = tensor([1, 32, 1, 128])]; tensor var_6199 = reshape(shape = var_6198, x = var_6193)[name = string("op_6199")]; string var_6215_pad_type_0 = const()[name = string("op_6215_pad_type_0"), val = string("valid")]; tensor var_6215_strides_0 = const()[name = string("op_6215_strides_0"), val = tensor([1, 1])]; tensor var_6215_pad_0 = const()[name = string("op_6215_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6215_dilations_0 = const()[name = string("op_6215_dilations_0"), val = tensor([1, 1])]; int32 var_6215_groups_0 = const()[name = string("op_6215_groups_0"), val = int32(1)]; tensor var_6215 = conv(dilations = var_6215_dilations_0, groups = var_6215_groups_0, pad = var_6215_pad_0, pad_type = var_6215_pad_type_0, strides = var_6215_strides_0, weight = model_model_layers_29_self_attn_k_proj_weight_palettized, x = var_6177)[name = string("op_6215")]; tensor var_6220 = const()[name = string("op_6220"), val = tensor([1, 8, 1, 128])]; tensor var_6221 = reshape(shape = var_6220, x = var_6215)[name = string("op_6221")]; string var_6237_pad_type_0 = const()[name = string("op_6237_pad_type_0"), val = string("valid")]; tensor var_6237_strides_0 = const()[name = string("op_6237_strides_0"), val = tensor([1, 1])]; tensor var_6237_pad_0 = const()[name = string("op_6237_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6237_dilations_0 = const()[name = string("op_6237_dilations_0"), val = tensor([1, 1])]; int32 var_6237_groups_0 = const()[name = string("op_6237_groups_0"), val = int32(1)]; tensor var_6237 = conv(dilations = var_6237_dilations_0, groups = var_6237_groups_0, pad = var_6237_pad_0, pad_type = var_6237_pad_type_0, strides = var_6237_strides_0, weight = model_model_layers_29_self_attn_v_proj_weight_palettized, x = var_6177)[name = string("op_6237")]; tensor var_6242 = const()[name = string("op_6242"), val = tensor([1, 8, 1, 128])]; tensor var_6243 = reshape(shape = var_6242, x = var_6237)[name = string("op_6243")]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = var_6199)[name = string("mean_91")]; tensor input_203 = sub(x = var_6199, y = mean_91)[name = string("input_203")]; tensor var_6264_axes_0 = const()[name = string("op_6264_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_29_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465088)))]; fp16 var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6264_cast_fp16 = layer_norm(axes = var_6264_axes_0, epsilon = var_6252_to_fp16, gamma = model_model_layers_29_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_6264_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = var_6221)[name = string("mean_93")]; tensor input_205 = sub(x = var_6221, y = mean_93)[name = string("input_205")]; tensor var_6282_axes_0 = const()[name = string("op_6282_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_29_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465408)))]; fp16 var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6282_cast_fp16 = layer_norm(axes = var_6282_axes_0, epsilon = var_6270_to_fp16, gamma = model_model_layers_29_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_6282_cast_fp16")]; tensor var_6285 = mul(x = var_6264_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6285")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_6264_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_6264_cast_fp16)[name = string("x2_45")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_6306 = mul(x = x2_45, y = const_203_promoted)[name = string("op_6306")]; int32 var_6308 = const()[name = string("op_6308"), val = int32(-1)]; bool var_6309_interleave_0 = const()[name = string("op_6309_interleave_0"), val = bool(false)]; tensor var_6309 = concat(axis = var_6308, interleave = var_6309_interleave_0, values = (var_6306, x1_45))[name = string("op_6309")]; tensor var_6310 = mul(x = var_6309, y = sin_1_cast_fp16)[name = string("op_6310")]; tensor query_states_45 = add(x = var_6285, y = var_6310)[name = string("query_states_45")]; tensor var_6313 = mul(x = var_6282_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6313")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_6282_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_6282_cast_fp16)[name = string("x2_47")]; fp16 const_206_promoted = const()[name = string("const_206_promoted"), val = fp16(-0x1p+0)]; tensor var_6334 = mul(x = x2_47, y = const_206_promoted)[name = string("op_6334")]; int32 var_6336 = const()[name = string("op_6336"), val = int32(-1)]; bool var_6337_interleave_0 = const()[name = string("op_6337_interleave_0"), val = bool(false)]; tensor var_6337 = concat(axis = var_6336, interleave = var_6337_interleave_0, values = (var_6334, x1_47))[name = string("op_6337")]; tensor var_6338 = mul(x = var_6337, y = sin_1_cast_fp16)[name = string("op_6338")]; tensor key_states_45 = add(x = var_6313, y = var_6338)[name = string("key_states_45")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([29])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([30])]; int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_90")]; tensor concat_91_values1_0 = const()[name = string("concat_91_values1_0"), val = tensor([0])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_136, concat_91_values1_0, var_1195, concat_91_values3_0))[name = string("concat_91")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_45, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([65])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([66])]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_94")]; tensor concat_95_values1_0 = const()[name = string("concat_95_values1_0"), val = tensor([0])]; tensor concat_95_values3_0 = const()[name = string("concat_95_values3_0"), val = tensor([0])]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (expand_dims_142, concat_95_values1_0, var_1195, concat_95_values3_0))[name = string("concat_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = var_6243, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_6393_begin_0 = const()[name = string("op_6393_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_6393_end_0 = const()[name = string("op_6393_end_0"), val = tensor([30, 8, 1024, 128])]; tensor var_6393_end_mask_0 = const()[name = string("op_6393_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6393_cast_fp16 = slice_by_index(begin = var_6393_begin_0, end = var_6393_end_0, end_mask = var_6393_end_mask_0, x = coreml_update_state_59)[name = string("op_6393_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_6393_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_6400_begin_0 = const()[name = string("op_6400_begin_0"), val = tensor([65, 0, 0, 0])]; tensor var_6400_end_0 = const()[name = string("op_6400_end_0"), val = tensor([66, 8, 1024, 128])]; tensor var_6400_end_mask_0 = const()[name = string("op_6400_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6400_cast_fp16 = slice_by_index(begin = var_6400_begin_0, end = var_6400_end_0, end_mask = var_6400_end_mask_0, x = coreml_update_state_59)[name = string("op_6400_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_6400_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_6437 = const()[name = string("op_6437"), val = tensor([1, 4, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_6437, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_6449 = const()[name = string("op_6449"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_6449, x = x_229_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_6457 = const()[name = string("op_6457"), val = tensor([1, 4, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_6457, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_6469 = const()[name = string("op_6469"), val = tensor([1, -1, 1024, 128])]; tensor value_states_69_cast_fp16 = reshape(shape = var_6469, x = x_235_cast_fp16)[name = string("value_states_69_cast_fp16")]; bool var_6484_transpose_x_1 = const()[name = string("op_6484_transpose_x_1"), val = bool(false)]; bool var_6484_transpose_y_1 = const()[name = string("op_6484_transpose_y_1"), val = bool(true)]; tensor var_6484 = matmul(transpose_x = var_6484_transpose_x_1, transpose_y = var_6484_transpose_y_1, x = query_states_45, y = key_states_47_cast_fp16)[name = string("op_6484")]; fp16 var_6485_to_fp16 = const()[name = string("op_6485_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_6484, y = var_6485_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_6520 = const()[name = string("op_6520"), val = int32(-1)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_6520, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = value_states_69_cast_fp16)[name = string("attn_output_111_cast_fp16")]; tensor var_6531_perm_0 = const()[name = string("op_6531_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6535 = const()[name = string("op_6535"), val = tensor([1, 1, 4096])]; tensor var_6531_cast_fp16 = transpose(perm = var_6531_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_40")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_6535, x = var_6531_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_6540 = const()[name = string("op_6540"), val = tensor([0, 2, 1])]; string var_6556_pad_type_0 = const()[name = string("op_6556_pad_type_0"), val = string("valid")]; int32 var_6556_groups_0 = const()[name = string("op_6556_groups_0"), val = int32(1)]; tensor var_6556_strides_0 = const()[name = string("op_6556_strides_0"), val = tensor([1])]; tensor var_6556_pad_0 = const()[name = string("op_6556_pad_0"), val = tensor([0, 0])]; tensor var_6556_dilations_0 = const()[name = string("op_6556_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912708672))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6541_cast_fp16 = transpose(perm = var_6540, x = attn_output_115_cast_fp16)[name = string("transpose_39")]; tensor var_6556_cast_fp16 = conv(dilations = var_6556_dilations_0, groups = var_6556_groups_0, pad = var_6556_pad_0, pad_type = var_6556_pad_type_0, strides = var_6556_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_6541_cast_fp16)[name = string("op_6556_cast_fp16")]; tensor var_6560 = const()[name = string("op_6560"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_6560, x = var_6556_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_6579_axes_0 = const()[name = string("op_6579_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912718976)))]; fp16 var_6567_to_fp16 = const()[name = string("op_6567_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6579_cast_fp16 = layer_norm(axes = var_6579_axes_0, epsilon = var_6567_to_fp16, gamma = model_model_layers_29_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_6579_cast_fp16")]; tensor var_6593 = const()[name = string("op_6593"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_6594 = transpose(perm = var_6593, x = var_6579_cast_fp16)[name = string("transpose_37")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_6594)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_29_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_29_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_29_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_6616_axes_0 = const()[name = string("op_6616_axes_0"), val = tensor([2])]; tensor var_6616 = squeeze(axes = var_6616_axes_0, x = e_23)[name = string("op_6616")]; tensor var_6617 = const()[name = string("op_6617"), val = tensor([0, 2, 1])]; tensor var_6618 = transpose(perm = var_6617, x = var_6616)[name = string("transpose_36")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_6618)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_6636_axes_0 = const()[name = string("op_6636_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912724160)))]; fp16 var_6624_to_fp16 = const()[name = string("op_6624_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6636_cast_fp16 = layer_norm(axes = var_6636_axes_0, epsilon = var_6624_to_fp16, gamma = model_model_layers_30_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_6636_cast_fp16")]; tensor var_6642 = const()[name = string("op_6642"), val = tensor([0, 2, 1])]; tensor var_6645_axes_0 = const()[name = string("op_6645_axes_0"), val = tensor([2])]; tensor var_6643 = transpose(perm = var_6642, x = var_6636_cast_fp16)[name = string("transpose_35")]; tensor var_6645 = expand_dims(axes = var_6645_axes_0, x = var_6643)[name = string("op_6645")]; string var_6661_pad_type_0 = const()[name = string("op_6661_pad_type_0"), val = string("valid")]; tensor var_6661_strides_0 = const()[name = string("op_6661_strides_0"), val = tensor([1, 1])]; tensor var_6661_pad_0 = const()[name = string("op_6661_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6661_dilations_0 = const()[name = string("op_6661_dilations_0"), val = tensor([1, 1])]; int32 var_6661_groups_0 = const()[name = string("op_6661_groups_0"), val = int32(1)]; tensor var_6661 = conv(dilations = var_6661_dilations_0, groups = var_6661_groups_0, pad = var_6661_pad_0, pad_type = var_6661_pad_type_0, strides = var_6661_strides_0, weight = model_model_layers_30_self_attn_q_proj_weight_palettized, x = var_6645)[name = string("op_6661")]; tensor var_6666 = const()[name = string("op_6666"), val = tensor([1, 32, 1, 128])]; tensor var_6667 = reshape(shape = var_6666, x = var_6661)[name = string("op_6667")]; string var_6683_pad_type_0 = const()[name = string("op_6683_pad_type_0"), val = string("valid")]; tensor var_6683_strides_0 = const()[name = string("op_6683_strides_0"), val = tensor([1, 1])]; tensor var_6683_pad_0 = const()[name = string("op_6683_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6683_dilations_0 = const()[name = string("op_6683_dilations_0"), val = tensor([1, 1])]; int32 var_6683_groups_0 = const()[name = string("op_6683_groups_0"), val = int32(1)]; tensor var_6683 = conv(dilations = var_6683_dilations_0, groups = var_6683_groups_0, pad = var_6683_pad_0, pad_type = var_6683_pad_type_0, strides = var_6683_strides_0, weight = model_model_layers_30_self_attn_k_proj_weight_palettized, x = var_6645)[name = string("op_6683")]; tensor var_6688 = const()[name = string("op_6688"), val = tensor([1, 8, 1, 128])]; tensor var_6689 = reshape(shape = var_6688, x = var_6683)[name = string("op_6689")]; string var_6705_pad_type_0 = const()[name = string("op_6705_pad_type_0"), val = string("valid")]; tensor var_6705_strides_0 = const()[name = string("op_6705_strides_0"), val = tensor([1, 1])]; tensor var_6705_pad_0 = const()[name = string("op_6705_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6705_dilations_0 = const()[name = string("op_6705_dilations_0"), val = tensor([1, 1])]; int32 var_6705_groups_0 = const()[name = string("op_6705_groups_0"), val = int32(1)]; tensor var_6705 = conv(dilations = var_6705_dilations_0, groups = var_6705_groups_0, pad = var_6705_pad_0, pad_type = var_6705_pad_type_0, strides = var_6705_strides_0, weight = model_model_layers_30_self_attn_v_proj_weight_palettized, x = var_6645)[name = string("op_6705")]; tensor var_6710 = const()[name = string("op_6710"), val = tensor([1, 8, 1, 128])]; tensor var_6711 = reshape(shape = var_6710, x = var_6705)[name = string("op_6711")]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = var_6667)[name = string("mean_99")]; tensor input_221 = sub(x = var_6667, y = mean_99)[name = string("input_221")]; tensor var_6732_axes_0 = const()[name = string("op_6732_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_30_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729344)))]; fp16 var_6720_to_fp16 = const()[name = string("op_6720_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6732_cast_fp16 = layer_norm(axes = var_6732_axes_0, epsilon = var_6720_to_fp16, gamma = model_model_layers_30_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_6732_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = var_6689)[name = string("mean_101")]; tensor input_223 = sub(x = var_6689, y = mean_101)[name = string("input_223")]; tensor var_6750_axes_0 = const()[name = string("op_6750_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_30_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729664)))]; fp16 var_6738_to_fp16 = const()[name = string("op_6738_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6750_cast_fp16 = layer_norm(axes = var_6750_axes_0, epsilon = var_6738_to_fp16, gamma = model_model_layers_30_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_6750_cast_fp16")]; tensor var_6753 = mul(x = var_6732_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6753")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_6732_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_6732_cast_fp16)[name = string("x2_49")]; fp16 const_221_promoted = const()[name = string("const_221_promoted"), val = fp16(-0x1p+0)]; tensor var_6774 = mul(x = x2_49, y = const_221_promoted)[name = string("op_6774")]; int32 var_6776 = const()[name = string("op_6776"), val = int32(-1)]; bool var_6777_interleave_0 = const()[name = string("op_6777_interleave_0"), val = bool(false)]; tensor var_6777 = concat(axis = var_6776, interleave = var_6777_interleave_0, values = (var_6774, x1_49))[name = string("op_6777")]; tensor var_6778 = mul(x = var_6777, y = sin_1_cast_fp16)[name = string("op_6778")]; tensor query_states_49 = add(x = var_6753, y = var_6778)[name = string("query_states_49")]; tensor var_6781 = mul(x = var_6750_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6781")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_6750_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_6750_cast_fp16)[name = string("x2_51")]; fp16 const_224_promoted = const()[name = string("const_224_promoted"), val = fp16(-0x1p+0)]; tensor var_6802 = mul(x = x2_51, y = const_224_promoted)[name = string("op_6802")]; int32 var_6804 = const()[name = string("op_6804"), val = int32(-1)]; bool var_6805_interleave_0 = const()[name = string("op_6805_interleave_0"), val = bool(false)]; tensor var_6805 = concat(axis = var_6804, interleave = var_6805_interleave_0, values = (var_6802, x1_51))[name = string("op_6805")]; tensor var_6806 = mul(x = var_6805, y = sin_1_cast_fp16)[name = string("op_6806")]; tensor key_states_49 = add(x = var_6781, y = var_6806)[name = string("key_states_49")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([30])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([31])]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1195, concat_99_values3_0))[name = string("concat_99")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_49, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([66])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([67])]; int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1195, concat_103_values3_0))[name = string("concat_103")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = var_6711, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_6861_begin_0 = const()[name = string("op_6861_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_6861_end_0 = const()[name = string("op_6861_end_0"), val = tensor([31, 8, 1024, 128])]; tensor var_6861_end_mask_0 = const()[name = string("op_6861_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6861_cast_fp16 = slice_by_index(begin = var_6861_begin_0, end = var_6861_end_0, end_mask = var_6861_end_mask_0, x = coreml_update_state_61)[name = string("op_6861_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_6861_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_6868_begin_0 = const()[name = string("op_6868_begin_0"), val = tensor([66, 0, 0, 0])]; tensor var_6868_end_0 = const()[name = string("op_6868_end_0"), val = tensor([67, 8, 1024, 128])]; tensor var_6868_end_mask_0 = const()[name = string("op_6868_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6868_cast_fp16 = slice_by_index(begin = var_6868_begin_0, end = var_6868_end_0, end_mask = var_6868_end_mask_0, x = coreml_update_state_61)[name = string("op_6868_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_6868_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_6905 = const()[name = string("op_6905"), val = tensor([1, 4, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_6905, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_6917 = const()[name = string("op_6917"), val = tensor([1, -1, 1024, 128])]; tensor key_states_51_cast_fp16 = reshape(shape = var_6917, x = x_249_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_6925 = const()[name = string("op_6925"), val = tensor([1, 4, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_6925, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; tensor var_6937 = const()[name = string("op_6937"), val = tensor([1, -1, 1024, 128])]; tensor value_states_75_cast_fp16 = reshape(shape = var_6937, x = x_255_cast_fp16)[name = string("value_states_75_cast_fp16")]; bool var_6952_transpose_x_1 = const()[name = string("op_6952_transpose_x_1"), val = bool(false)]; bool var_6952_transpose_y_1 = const()[name = string("op_6952_transpose_y_1"), val = bool(true)]; tensor var_6952 = matmul(transpose_x = var_6952_transpose_x_1, transpose_y = var_6952_transpose_y_1, x = query_states_49, y = key_states_51_cast_fp16)[name = string("op_6952")]; fp16 var_6953_to_fp16 = const()[name = string("op_6953_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_6952, y = var_6953_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_6988 = const()[name = string("op_6988"), val = int32(-1)]; tensor attn_weights_77_cast_fp16 = softmax(axis = var_6988, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = value_states_75_cast_fp16)[name = string("attn_output_121_cast_fp16")]; tensor var_6999_perm_0 = const()[name = string("op_6999_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7003 = const()[name = string("op_7003"), val = tensor([1, 1, 4096])]; tensor var_6999_cast_fp16 = transpose(perm = var_6999_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_34")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_7003, x = var_6999_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_7008 = const()[name = string("op_7008"), val = tensor([0, 2, 1])]; string var_7024_pad_type_0 = const()[name = string("op_7024_pad_type_0"), val = string("valid")]; int32 var_7024_groups_0 = const()[name = string("op_7024_groups_0"), val = int32(1)]; tensor var_7024_strides_0 = const()[name = string("op_7024_strides_0"), val = tensor([1])]; tensor var_7024_pad_0 = const()[name = string("op_7024_pad_0"), val = tensor([0, 0])]; tensor var_7024_dilations_0 = const()[name = string("op_7024_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917972928))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7009_cast_fp16 = transpose(perm = var_7008, x = attn_output_125_cast_fp16)[name = string("transpose_33")]; tensor var_7024_cast_fp16 = conv(dilations = var_7024_dilations_0, groups = var_7024_groups_0, pad = var_7024_pad_0, pad_type = var_7024_pad_type_0, strides = var_7024_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7009_cast_fp16)[name = string("op_7024_cast_fp16")]; tensor var_7028 = const()[name = string("op_7028"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_7028, x = var_7024_cast_fp16)[name = string("transpose_32")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_7047_axes_0 = const()[name = string("op_7047_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917983232)))]; fp16 var_7035_to_fp16 = const()[name = string("op_7035_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7047_cast_fp16 = layer_norm(axes = var_7047_axes_0, epsilon = var_7035_to_fp16, gamma = model_model_layers_30_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_7047_cast_fp16")]; tensor var_7061 = const()[name = string("op_7061"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_7062 = transpose(perm = var_7061, x = var_7047_cast_fp16)[name = string("transpose_31")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7062)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_30_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_30_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_30_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_7084_axes_0 = const()[name = string("op_7084_axes_0"), val = tensor([2])]; tensor var_7084 = squeeze(axes = var_7084_axes_0, x = e_25)[name = string("op_7084")]; tensor var_7085 = const()[name = string("op_7085"), val = tensor([0, 2, 1])]; tensor var_7086 = transpose(perm = var_7085, x = var_7084)[name = string("transpose_30")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_7086)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_7104_axes_0 = const()[name = string("op_7104_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917988416)))]; fp16 var_7092_to_fp16 = const()[name = string("op_7092_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7104_cast_fp16 = layer_norm(axes = var_7104_axes_0, epsilon = var_7092_to_fp16, gamma = model_model_layers_31_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_7104_cast_fp16")]; tensor var_7110 = const()[name = string("op_7110"), val = tensor([0, 2, 1])]; tensor var_7113_axes_0 = const()[name = string("op_7113_axes_0"), val = tensor([2])]; tensor var_7111 = transpose(perm = var_7110, x = var_7104_cast_fp16)[name = string("transpose_29")]; tensor var_7113 = expand_dims(axes = var_7113_axes_0, x = var_7111)[name = string("op_7113")]; string var_7129_pad_type_0 = const()[name = string("op_7129_pad_type_0"), val = string("valid")]; tensor var_7129_strides_0 = const()[name = string("op_7129_strides_0"), val = tensor([1, 1])]; tensor var_7129_pad_0 = const()[name = string("op_7129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7129_dilations_0 = const()[name = string("op_7129_dilations_0"), val = tensor([1, 1])]; int32 var_7129_groups_0 = const()[name = string("op_7129_groups_0"), val = int32(1)]; tensor var_7129 = conv(dilations = var_7129_dilations_0, groups = var_7129_groups_0, pad = var_7129_pad_0, pad_type = var_7129_pad_type_0, strides = var_7129_strides_0, weight = model_model_layers_31_self_attn_q_proj_weight_palettized, x = var_7113)[name = string("op_7129")]; tensor var_7134 = const()[name = string("op_7134"), val = tensor([1, 32, 1, 128])]; tensor var_7135 = reshape(shape = var_7134, x = var_7129)[name = string("op_7135")]; string var_7151_pad_type_0 = const()[name = string("op_7151_pad_type_0"), val = string("valid")]; tensor var_7151_strides_0 = const()[name = string("op_7151_strides_0"), val = tensor([1, 1])]; tensor var_7151_pad_0 = const()[name = string("op_7151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7151_dilations_0 = const()[name = string("op_7151_dilations_0"), val = tensor([1, 1])]; int32 var_7151_groups_0 = const()[name = string("op_7151_groups_0"), val = int32(1)]; tensor var_7151 = conv(dilations = var_7151_dilations_0, groups = var_7151_groups_0, pad = var_7151_pad_0, pad_type = var_7151_pad_type_0, strides = var_7151_strides_0, weight = model_model_layers_31_self_attn_k_proj_weight_palettized, x = var_7113)[name = string("op_7151")]; tensor var_7156 = const()[name = string("op_7156"), val = tensor([1, 8, 1, 128])]; tensor var_7157 = reshape(shape = var_7156, x = var_7151)[name = string("op_7157")]; string var_7173_pad_type_0 = const()[name = string("op_7173_pad_type_0"), val = string("valid")]; tensor var_7173_strides_0 = const()[name = string("op_7173_strides_0"), val = tensor([1, 1])]; tensor var_7173_pad_0 = const()[name = string("op_7173_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7173_dilations_0 = const()[name = string("op_7173_dilations_0"), val = tensor([1, 1])]; int32 var_7173_groups_0 = const()[name = string("op_7173_groups_0"), val = int32(1)]; tensor var_7173 = conv(dilations = var_7173_dilations_0, groups = var_7173_groups_0, pad = var_7173_pad_0, pad_type = var_7173_pad_type_0, strides = var_7173_strides_0, weight = model_model_layers_31_self_attn_v_proj_weight_palettized, x = var_7113)[name = string("op_7173")]; tensor var_7178 = const()[name = string("op_7178"), val = tensor([1, 8, 1, 128])]; tensor var_7179 = reshape(shape = var_7178, x = var_7173)[name = string("op_7179")]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = var_7135)[name = string("mean_107")]; tensor input_239 = sub(x = var_7135, y = mean_107)[name = string("input_239")]; tensor var_7200_axes_0 = const()[name = string("op_7200_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_31_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993600)))]; fp16 var_7188_to_fp16 = const()[name = string("op_7188_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7200_cast_fp16 = layer_norm(axes = var_7200_axes_0, epsilon = var_7188_to_fp16, gamma = model_model_layers_31_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_7200_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = var_7157)[name = string("mean_109")]; tensor input_241 = sub(x = var_7157, y = mean_109)[name = string("input_241")]; tensor var_7218_axes_0 = const()[name = string("op_7218_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_31_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993920)))]; fp16 var_7206_to_fp16 = const()[name = string("op_7206_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7218_cast_fp16 = layer_norm(axes = var_7218_axes_0, epsilon = var_7206_to_fp16, gamma = model_model_layers_31_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_7218_cast_fp16")]; tensor var_7221 = mul(x = var_7200_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7221")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_7200_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_7200_cast_fp16)[name = string("x2_53")]; fp16 const_239_promoted = const()[name = string("const_239_promoted"), val = fp16(-0x1p+0)]; tensor var_7242 = mul(x = x2_53, y = const_239_promoted)[name = string("op_7242")]; int32 var_7244 = const()[name = string("op_7244"), val = int32(-1)]; bool var_7245_interleave_0 = const()[name = string("op_7245_interleave_0"), val = bool(false)]; tensor var_7245 = concat(axis = var_7244, interleave = var_7245_interleave_0, values = (var_7242, x1_53))[name = string("op_7245")]; tensor var_7246 = mul(x = var_7245, y = sin_1_cast_fp16)[name = string("op_7246")]; tensor query_states_53 = add(x = var_7221, y = var_7246)[name = string("query_states_53")]; tensor var_7249 = mul(x = var_7218_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7249")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_7218_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_7218_cast_fp16)[name = string("x2_55")]; fp16 const_242_promoted = const()[name = string("const_242_promoted"), val = fp16(-0x1p+0)]; tensor var_7270 = mul(x = x2_55, y = const_242_promoted)[name = string("op_7270")]; int32 var_7272 = const()[name = string("op_7272"), val = int32(-1)]; bool var_7273_interleave_0 = const()[name = string("op_7273_interleave_0"), val = bool(false)]; tensor var_7273 = concat(axis = var_7272, interleave = var_7273_interleave_0, values = (var_7270, x1_55))[name = string("op_7273")]; tensor var_7274 = mul(x = var_7273, y = sin_1_cast_fp16)[name = string("op_7274")]; tensor key_states_53 = add(x = var_7249, y = var_7274)[name = string("key_states_53")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([31])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([32])]; int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1195, concat_107_values3_0))[name = string("concat_107")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_53, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([67])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([68])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1195, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = var_7179, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_7329_begin_0 = const()[name = string("op_7329_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_7329_end_0 = const()[name = string("op_7329_end_0"), val = tensor([32, 8, 1024, 128])]; tensor var_7329_end_mask_0 = const()[name = string("op_7329_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7329_cast_fp16 = slice_by_index(begin = var_7329_begin_0, end = var_7329_end_0, end_mask = var_7329_end_mask_0, x = coreml_update_state_63)[name = string("op_7329_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_7329_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_7336_begin_0 = const()[name = string("op_7336_begin_0"), val = tensor([67, 0, 0, 0])]; tensor var_7336_end_0 = const()[name = string("op_7336_end_0"), val = tensor([68, 8, 1024, 128])]; tensor var_7336_end_mask_0 = const()[name = string("op_7336_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7336_cast_fp16 = slice_by_index(begin = var_7336_begin_0, end = var_7336_end_0, end_mask = var_7336_end_mask_0, x = coreml_update_state_63)[name = string("op_7336_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_7336_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_7373 = const()[name = string("op_7373"), val = tensor([1, 4, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_7373, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_7385 = const()[name = string("op_7385"), val = tensor([1, -1, 1024, 128])]; tensor key_states_55_cast_fp16 = reshape(shape = var_7385, x = x_269_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_7393 = const()[name = string("op_7393"), val = tensor([1, 4, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_7393, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; tensor var_7405 = const()[name = string("op_7405"), val = tensor([1, -1, 1024, 128])]; tensor value_states_81_cast_fp16 = reshape(shape = var_7405, x = x_275_cast_fp16)[name = string("value_states_81_cast_fp16")]; bool var_7420_transpose_x_1 = const()[name = string("op_7420_transpose_x_1"), val = bool(false)]; bool var_7420_transpose_y_1 = const()[name = string("op_7420_transpose_y_1"), val = bool(true)]; tensor var_7420 = matmul(transpose_x = var_7420_transpose_x_1, transpose_y = var_7420_transpose_y_1, x = query_states_53, y = key_states_55_cast_fp16)[name = string("op_7420")]; fp16 var_7421_to_fp16 = const()[name = string("op_7421_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_7420, y = var_7421_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_7456 = const()[name = string("op_7456"), val = int32(-1)]; tensor attn_weights_83_cast_fp16 = softmax(axis = var_7456, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = value_states_81_cast_fp16)[name = string("attn_output_131_cast_fp16")]; tensor var_7467_perm_0 = const()[name = string("op_7467_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7471 = const()[name = string("op_7471"), val = tensor([1, 1, 4096])]; tensor var_7467_cast_fp16 = transpose(perm = var_7467_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_28")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_7471, x = var_7467_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_7476 = const()[name = string("op_7476"), val = tensor([0, 2, 1])]; string var_7492_pad_type_0 = const()[name = string("op_7492_pad_type_0"), val = string("valid")]; int32 var_7492_groups_0 = const()[name = string("op_7492_groups_0"), val = int32(1)]; tensor var_7492_strides_0 = const()[name = string("op_7492_strides_0"), val = tensor([1])]; tensor var_7492_pad_0 = const()[name = string("op_7492_pad_0"), val = tensor([0, 0])]; tensor var_7492_dilations_0 = const()[name = string("op_7492_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917994240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923237184))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7477_cast_fp16 = transpose(perm = var_7476, x = attn_output_135_cast_fp16)[name = string("transpose_27")]; tensor var_7492_cast_fp16 = conv(dilations = var_7492_dilations_0, groups = var_7492_groups_0, pad = var_7492_pad_0, pad_type = var_7492_pad_type_0, strides = var_7492_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_7477_cast_fp16)[name = string("op_7492_cast_fp16")]; tensor var_7496 = const()[name = string("op_7496"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_7496, x = var_7492_cast_fp16)[name = string("transpose_26")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_7515_axes_0 = const()[name = string("op_7515_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923247488)))]; fp16 var_7503_to_fp16 = const()[name = string("op_7503_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7515_cast_fp16 = layer_norm(axes = var_7515_axes_0, epsilon = var_7503_to_fp16, gamma = model_model_layers_31_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_7515_cast_fp16")]; tensor var_7529 = const()[name = string("op_7529"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_7530 = transpose(perm = var_7529, x = var_7515_cast_fp16)[name = string("transpose_25")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_7530)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_31_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_31_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_31_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_7552_axes_0 = const()[name = string("op_7552_axes_0"), val = tensor([2])]; tensor var_7552 = squeeze(axes = var_7552_axes_0, x = e_27)[name = string("op_7552")]; tensor var_7553 = const()[name = string("op_7553"), val = tensor([0, 2, 1])]; tensor var_7554 = transpose(perm = var_7553, x = var_7552)[name = string("transpose_24")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_7554)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_7572_axes_0 = const()[name = string("op_7572_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_32_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923252672)))]; fp16 var_7560_to_fp16 = const()[name = string("op_7560_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7572_cast_fp16 = layer_norm(axes = var_7572_axes_0, epsilon = var_7560_to_fp16, gamma = model_model_layers_32_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_7572_cast_fp16")]; tensor var_7578 = const()[name = string("op_7578"), val = tensor([0, 2, 1])]; tensor var_7581_axes_0 = const()[name = string("op_7581_axes_0"), val = tensor([2])]; tensor var_7579 = transpose(perm = var_7578, x = var_7572_cast_fp16)[name = string("transpose_23")]; tensor var_7581 = expand_dims(axes = var_7581_axes_0, x = var_7579)[name = string("op_7581")]; string var_7597_pad_type_0 = const()[name = string("op_7597_pad_type_0"), val = string("valid")]; tensor var_7597_strides_0 = const()[name = string("op_7597_strides_0"), val = tensor([1, 1])]; tensor var_7597_pad_0 = const()[name = string("op_7597_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7597_dilations_0 = const()[name = string("op_7597_dilations_0"), val = tensor([1, 1])]; int32 var_7597_groups_0 = const()[name = string("op_7597_groups_0"), val = int32(1)]; tensor var_7597 = conv(dilations = var_7597_dilations_0, groups = var_7597_groups_0, pad = var_7597_pad_0, pad_type = var_7597_pad_type_0, strides = var_7597_strides_0, weight = model_model_layers_32_self_attn_q_proj_weight_palettized, x = var_7581)[name = string("op_7597")]; tensor var_7602 = const()[name = string("op_7602"), val = tensor([1, 32, 1, 128])]; tensor var_7603 = reshape(shape = var_7602, x = var_7597)[name = string("op_7603")]; string var_7619_pad_type_0 = const()[name = string("op_7619_pad_type_0"), val = string("valid")]; tensor var_7619_strides_0 = const()[name = string("op_7619_strides_0"), val = tensor([1, 1])]; tensor var_7619_pad_0 = const()[name = string("op_7619_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7619_dilations_0 = const()[name = string("op_7619_dilations_0"), val = tensor([1, 1])]; int32 var_7619_groups_0 = const()[name = string("op_7619_groups_0"), val = int32(1)]; tensor var_7619 = conv(dilations = var_7619_dilations_0, groups = var_7619_groups_0, pad = var_7619_pad_0, pad_type = var_7619_pad_type_0, strides = var_7619_strides_0, weight = model_model_layers_32_self_attn_k_proj_weight_palettized, x = var_7581)[name = string("op_7619")]; tensor var_7624 = const()[name = string("op_7624"), val = tensor([1, 8, 1, 128])]; tensor var_7625 = reshape(shape = var_7624, x = var_7619)[name = string("op_7625")]; string var_7641_pad_type_0 = const()[name = string("op_7641_pad_type_0"), val = string("valid")]; tensor var_7641_strides_0 = const()[name = string("op_7641_strides_0"), val = tensor([1, 1])]; tensor var_7641_pad_0 = const()[name = string("op_7641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7641_dilations_0 = const()[name = string("op_7641_dilations_0"), val = tensor([1, 1])]; int32 var_7641_groups_0 = const()[name = string("op_7641_groups_0"), val = int32(1)]; tensor var_7641 = conv(dilations = var_7641_dilations_0, groups = var_7641_groups_0, pad = var_7641_pad_0, pad_type = var_7641_pad_type_0, strides = var_7641_strides_0, weight = model_model_layers_32_self_attn_v_proj_weight_palettized, x = var_7581)[name = string("op_7641")]; tensor var_7646 = const()[name = string("op_7646"), val = tensor([1, 8, 1, 128])]; tensor var_7647 = reshape(shape = var_7646, x = var_7641)[name = string("op_7647")]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = var_7603)[name = string("mean_115")]; tensor input_257 = sub(x = var_7603, y = mean_115)[name = string("input_257")]; tensor var_7668_axes_0 = const()[name = string("op_7668_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_32_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923257856)))]; fp16 var_7656_to_fp16 = const()[name = string("op_7656_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7668_cast_fp16 = layer_norm(axes = var_7668_axes_0, epsilon = var_7656_to_fp16, gamma = model_model_layers_32_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_7668_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = var_7625)[name = string("mean_117")]; tensor input_259 = sub(x = var_7625, y = mean_117)[name = string("input_259")]; tensor var_7686_axes_0 = const()[name = string("op_7686_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_32_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258176)))]; fp16 var_7674_to_fp16 = const()[name = string("op_7674_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7686_cast_fp16 = layer_norm(axes = var_7686_axes_0, epsilon = var_7674_to_fp16, gamma = model_model_layers_32_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_7686_cast_fp16")]; tensor var_7689 = mul(x = var_7668_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7689")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_7668_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_7668_cast_fp16)[name = string("x2_57")]; fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; tensor var_7710 = mul(x = x2_57, y = const_257_promoted)[name = string("op_7710")]; int32 var_7712 = const()[name = string("op_7712"), val = int32(-1)]; bool var_7713_interleave_0 = const()[name = string("op_7713_interleave_0"), val = bool(false)]; tensor var_7713 = concat(axis = var_7712, interleave = var_7713_interleave_0, values = (var_7710, x1_57))[name = string("op_7713")]; tensor var_7714 = mul(x = var_7713, y = sin_1_cast_fp16)[name = string("op_7714")]; tensor query_states_57 = add(x = var_7689, y = var_7714)[name = string("query_states_57")]; tensor var_7717 = mul(x = var_7686_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7717")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_7686_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_7686_cast_fp16)[name = string("x2_59")]; fp16 const_260_promoted = const()[name = string("const_260_promoted"), val = fp16(-0x1p+0)]; tensor var_7738 = mul(x = x2_59, y = const_260_promoted)[name = string("op_7738")]; int32 var_7740 = const()[name = string("op_7740"), val = int32(-1)]; bool var_7741_interleave_0 = const()[name = string("op_7741_interleave_0"), val = bool(false)]; tensor var_7741 = concat(axis = var_7740, interleave = var_7741_interleave_0, values = (var_7738, x1_59))[name = string("op_7741")]; tensor var_7742 = mul(x = var_7741, y = sin_1_cast_fp16)[name = string("op_7742")]; tensor key_states_57 = add(x = var_7717, y = var_7742)[name = string("key_states_57")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([32])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([33])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1195, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_57, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([68])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([69])]; int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1195, concat_119_values3_0))[name = string("concat_119")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = var_7647, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_7797_begin_0 = const()[name = string("op_7797_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_7797_end_0 = const()[name = string("op_7797_end_0"), val = tensor([33, 8, 1024, 128])]; tensor var_7797_end_mask_0 = const()[name = string("op_7797_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7797_cast_fp16 = slice_by_index(begin = var_7797_begin_0, end = var_7797_end_0, end_mask = var_7797_end_mask_0, x = coreml_update_state_65)[name = string("op_7797_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_7797_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_7804_begin_0 = const()[name = string("op_7804_begin_0"), val = tensor([68, 0, 0, 0])]; tensor var_7804_end_0 = const()[name = string("op_7804_end_0"), val = tensor([69, 8, 1024, 128])]; tensor var_7804_end_mask_0 = const()[name = string("op_7804_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7804_cast_fp16 = slice_by_index(begin = var_7804_begin_0, end = var_7804_end_0, end_mask = var_7804_end_mask_0, x = coreml_update_state_65)[name = string("op_7804_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_7804_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_7841 = const()[name = string("op_7841"), val = tensor([1, 4, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_7841, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_7853 = const()[name = string("op_7853"), val = tensor([1, -1, 1024, 128])]; tensor key_states_59_cast_fp16 = reshape(shape = var_7853, x = x_289_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_7861 = const()[name = string("op_7861"), val = tensor([1, 4, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_7861, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; tensor var_7873 = const()[name = string("op_7873"), val = tensor([1, -1, 1024, 128])]; tensor value_states_87_cast_fp16 = reshape(shape = var_7873, x = x_295_cast_fp16)[name = string("value_states_87_cast_fp16")]; bool var_7888_transpose_x_1 = const()[name = string("op_7888_transpose_x_1"), val = bool(false)]; bool var_7888_transpose_y_1 = const()[name = string("op_7888_transpose_y_1"), val = bool(true)]; tensor var_7888 = matmul(transpose_x = var_7888_transpose_x_1, transpose_y = var_7888_transpose_y_1, x = query_states_57, y = key_states_59_cast_fp16)[name = string("op_7888")]; fp16 var_7889_to_fp16 = const()[name = string("op_7889_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_7888, y = var_7889_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_7924 = const()[name = string("op_7924"), val = int32(-1)]; tensor attn_weights_89_cast_fp16 = softmax(axis = var_7924, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = value_states_87_cast_fp16)[name = string("attn_output_141_cast_fp16")]; tensor var_7935_perm_0 = const()[name = string("op_7935_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7939 = const()[name = string("op_7939"), val = tensor([1, 1, 4096])]; tensor var_7935_cast_fp16 = transpose(perm = var_7935_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_22")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_7939, x = var_7935_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_7944 = const()[name = string("op_7944"), val = tensor([0, 2, 1])]; string var_7960_pad_type_0 = const()[name = string("op_7960_pad_type_0"), val = string("valid")]; int32 var_7960_groups_0 = const()[name = string("op_7960_groups_0"), val = int32(1)]; tensor var_7960_strides_0 = const()[name = string("op_7960_strides_0"), val = tensor([1])]; tensor var_7960_pad_0 = const()[name = string("op_7960_pad_0"), val = tensor([0, 0])]; tensor var_7960_dilations_0 = const()[name = string("op_7960_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928501440))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7945_cast_fp16 = transpose(perm = var_7944, x = attn_output_145_cast_fp16)[name = string("transpose_21")]; tensor var_7960_cast_fp16 = conv(dilations = var_7960_dilations_0, groups = var_7960_groups_0, pad = var_7960_pad_0, pad_type = var_7960_pad_type_0, strides = var_7960_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_7945_cast_fp16)[name = string("op_7960_cast_fp16")]; tensor var_7964 = const()[name = string("op_7964"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_7964, x = var_7960_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_7983_axes_0 = const()[name = string("op_7983_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_32_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928511744)))]; fp16 var_7971_to_fp16 = const()[name = string("op_7971_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7983_cast_fp16 = layer_norm(axes = var_7983_axes_0, epsilon = var_7971_to_fp16, gamma = model_model_layers_32_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_7983_cast_fp16")]; tensor var_7997 = const()[name = string("op_7997"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_7998 = transpose(perm = var_7997, x = var_7983_cast_fp16)[name = string("transpose_19")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_7998)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_32_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_32_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_32_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_8020_axes_0 = const()[name = string("op_8020_axes_0"), val = tensor([2])]; tensor var_8020 = squeeze(axes = var_8020_axes_0, x = e_29)[name = string("op_8020")]; tensor var_8021 = const()[name = string("op_8021"), val = tensor([0, 2, 1])]; tensor var_8022 = transpose(perm = var_8021, x = var_8020)[name = string("transpose_18")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_8022)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_8040_axes_0 = const()[name = string("op_8040_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_33_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928516928)))]; fp16 var_8028_to_fp16 = const()[name = string("op_8028_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8040_cast_fp16 = layer_norm(axes = var_8040_axes_0, epsilon = var_8028_to_fp16, gamma = model_model_layers_33_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_8040_cast_fp16")]; tensor var_8046 = const()[name = string("op_8046"), val = tensor([0, 2, 1])]; tensor var_8049_axes_0 = const()[name = string("op_8049_axes_0"), val = tensor([2])]; tensor var_8047 = transpose(perm = var_8046, x = var_8040_cast_fp16)[name = string("transpose_17")]; tensor var_8049 = expand_dims(axes = var_8049_axes_0, x = var_8047)[name = string("op_8049")]; string var_8065_pad_type_0 = const()[name = string("op_8065_pad_type_0"), val = string("valid")]; tensor var_8065_strides_0 = const()[name = string("op_8065_strides_0"), val = tensor([1, 1])]; tensor var_8065_pad_0 = const()[name = string("op_8065_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8065_dilations_0 = const()[name = string("op_8065_dilations_0"), val = tensor([1, 1])]; int32 var_8065_groups_0 = const()[name = string("op_8065_groups_0"), val = int32(1)]; tensor var_8065 = conv(dilations = var_8065_dilations_0, groups = var_8065_groups_0, pad = var_8065_pad_0, pad_type = var_8065_pad_type_0, strides = var_8065_strides_0, weight = model_model_layers_33_self_attn_q_proj_weight_palettized, x = var_8049)[name = string("op_8065")]; tensor var_8070 = const()[name = string("op_8070"), val = tensor([1, 32, 1, 128])]; tensor var_8071 = reshape(shape = var_8070, x = var_8065)[name = string("op_8071")]; string var_8087_pad_type_0 = const()[name = string("op_8087_pad_type_0"), val = string("valid")]; tensor var_8087_strides_0 = const()[name = string("op_8087_strides_0"), val = tensor([1, 1])]; tensor var_8087_pad_0 = const()[name = string("op_8087_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8087_dilations_0 = const()[name = string("op_8087_dilations_0"), val = tensor([1, 1])]; int32 var_8087_groups_0 = const()[name = string("op_8087_groups_0"), val = int32(1)]; tensor var_8087 = conv(dilations = var_8087_dilations_0, groups = var_8087_groups_0, pad = var_8087_pad_0, pad_type = var_8087_pad_type_0, strides = var_8087_strides_0, weight = model_model_layers_33_self_attn_k_proj_weight_palettized, x = var_8049)[name = string("op_8087")]; tensor var_8092 = const()[name = string("op_8092"), val = tensor([1, 8, 1, 128])]; tensor var_8093 = reshape(shape = var_8092, x = var_8087)[name = string("op_8093")]; string var_8109_pad_type_0 = const()[name = string("op_8109_pad_type_0"), val = string("valid")]; tensor var_8109_strides_0 = const()[name = string("op_8109_strides_0"), val = tensor([1, 1])]; tensor var_8109_pad_0 = const()[name = string("op_8109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8109_dilations_0 = const()[name = string("op_8109_dilations_0"), val = tensor([1, 1])]; int32 var_8109_groups_0 = const()[name = string("op_8109_groups_0"), val = int32(1)]; tensor var_8109 = conv(dilations = var_8109_dilations_0, groups = var_8109_groups_0, pad = var_8109_pad_0, pad_type = var_8109_pad_type_0, strides = var_8109_strides_0, weight = model_model_layers_33_self_attn_v_proj_weight_palettized, x = var_8049)[name = string("op_8109")]; tensor var_8114 = const()[name = string("op_8114"), val = tensor([1, 8, 1, 128])]; tensor var_8115 = reshape(shape = var_8114, x = var_8109)[name = string("op_8115")]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = var_8071)[name = string("mean_123")]; tensor input_275 = sub(x = var_8071, y = mean_123)[name = string("input_275")]; tensor var_8136_axes_0 = const()[name = string("op_8136_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_33_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522112)))]; fp16 var_8124_to_fp16 = const()[name = string("op_8124_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8136_cast_fp16 = layer_norm(axes = var_8136_axes_0, epsilon = var_8124_to_fp16, gamma = model_model_layers_33_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_8136_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = var_8093)[name = string("mean_125")]; tensor input_277 = sub(x = var_8093, y = mean_125)[name = string("input_277")]; tensor var_8154_axes_0 = const()[name = string("op_8154_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_33_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522432)))]; fp16 var_8142_to_fp16 = const()[name = string("op_8142_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8154_cast_fp16 = layer_norm(axes = var_8154_axes_0, epsilon = var_8142_to_fp16, gamma = model_model_layers_33_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_8154_cast_fp16")]; tensor var_8157 = mul(x = var_8136_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8157")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_8136_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_8136_cast_fp16)[name = string("x2_61")]; fp16 const_275_promoted = const()[name = string("const_275_promoted"), val = fp16(-0x1p+0)]; tensor var_8178 = mul(x = x2_61, y = const_275_promoted)[name = string("op_8178")]; int32 var_8180 = const()[name = string("op_8180"), val = int32(-1)]; bool var_8181_interleave_0 = const()[name = string("op_8181_interleave_0"), val = bool(false)]; tensor var_8181 = concat(axis = var_8180, interleave = var_8181_interleave_0, values = (var_8178, x1_61))[name = string("op_8181")]; tensor var_8182 = mul(x = var_8181, y = sin_1_cast_fp16)[name = string("op_8182")]; tensor query_states_61 = add(x = var_8157, y = var_8182)[name = string("query_states_61")]; tensor var_8185 = mul(x = var_8154_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8185")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_8154_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_8154_cast_fp16)[name = string("x2_63")]; fp16 const_278_promoted = const()[name = string("const_278_promoted"), val = fp16(-0x1p+0)]; tensor var_8206 = mul(x = x2_63, y = const_278_promoted)[name = string("op_8206")]; int32 var_8208 = const()[name = string("op_8208"), val = int32(-1)]; bool var_8209_interleave_0 = const()[name = string("op_8209_interleave_0"), val = bool(false)]; tensor var_8209 = concat(axis = var_8208, interleave = var_8209_interleave_0, values = (var_8206, x1_63))[name = string("op_8209")]; tensor var_8210 = mul(x = var_8209, y = sin_1_cast_fp16)[name = string("op_8210")]; tensor key_states_61 = add(x = var_8185, y = var_8210)[name = string("key_states_61")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([33])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([34])]; int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1195, concat_123_values3_0))[name = string("concat_123")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_61, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([69])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([70])]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1195, concat_127_values3_0))[name = string("concat_127")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = var_8115, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_8265_begin_0 = const()[name = string("op_8265_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_8265_end_0 = const()[name = string("op_8265_end_0"), val = tensor([34, 8, 1024, 128])]; tensor var_8265_end_mask_0 = const()[name = string("op_8265_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8265_cast_fp16 = slice_by_index(begin = var_8265_begin_0, end = var_8265_end_0, end_mask = var_8265_end_mask_0, x = coreml_update_state_67)[name = string("op_8265_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_8265_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_8272_begin_0 = const()[name = string("op_8272_begin_0"), val = tensor([69, 0, 0, 0])]; tensor var_8272_end_0 = const()[name = string("op_8272_end_0"), val = tensor([70, 8, 1024, 128])]; tensor var_8272_end_mask_0 = const()[name = string("op_8272_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8272_cast_fp16 = slice_by_index(begin = var_8272_begin_0, end = var_8272_end_0, end_mask = var_8272_end_mask_0, x = coreml_update_state_67)[name = string("op_8272_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_8272_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_8309 = const()[name = string("op_8309"), val = tensor([1, 4, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_8309, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_8321 = const()[name = string("op_8321"), val = tensor([1, -1, 1024, 128])]; tensor key_states_63_cast_fp16 = reshape(shape = var_8321, x = x_309_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_8329 = const()[name = string("op_8329"), val = tensor([1, 4, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_8329, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; tensor var_8341 = const()[name = string("op_8341"), val = tensor([1, -1, 1024, 128])]; tensor value_states_93_cast_fp16 = reshape(shape = var_8341, x = x_315_cast_fp16)[name = string("value_states_93_cast_fp16")]; bool var_8356_transpose_x_1 = const()[name = string("op_8356_transpose_x_1"), val = bool(false)]; bool var_8356_transpose_y_1 = const()[name = string("op_8356_transpose_y_1"), val = bool(true)]; tensor var_8356 = matmul(transpose_x = var_8356_transpose_x_1, transpose_y = var_8356_transpose_y_1, x = query_states_61, y = key_states_63_cast_fp16)[name = string("op_8356")]; fp16 var_8357_to_fp16 = const()[name = string("op_8357_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_8356, y = var_8357_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_8392 = const()[name = string("op_8392"), val = int32(-1)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_8392, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = value_states_93_cast_fp16)[name = string("attn_output_151_cast_fp16")]; tensor var_8403_perm_0 = const()[name = string("op_8403_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8407 = const()[name = string("op_8407"), val = tensor([1, 1, 4096])]; tensor var_8403_cast_fp16 = transpose(perm = var_8403_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_16")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_8407, x = var_8403_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_8412 = const()[name = string("op_8412"), val = tensor([0, 2, 1])]; string var_8428_pad_type_0 = const()[name = string("op_8428_pad_type_0"), val = string("valid")]; int32 var_8428_groups_0 = const()[name = string("op_8428_groups_0"), val = int32(1)]; tensor var_8428_strides_0 = const()[name = string("op_8428_strides_0"), val = tensor([1])]; tensor var_8428_pad_0 = const()[name = string("op_8428_pad_0"), val = tensor([0, 0])]; tensor var_8428_dilations_0 = const()[name = string("op_8428_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933765696))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8413_cast_fp16 = transpose(perm = var_8412, x = attn_output_155_cast_fp16)[name = string("transpose_15")]; tensor var_8428_cast_fp16 = conv(dilations = var_8428_dilations_0, groups = var_8428_groups_0, pad = var_8428_pad_0, pad_type = var_8428_pad_type_0, strides = var_8428_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_8413_cast_fp16)[name = string("op_8428_cast_fp16")]; tensor var_8432 = const()[name = string("op_8432"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_8432, x = var_8428_cast_fp16)[name = string("transpose_14")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_8451_axes_0 = const()[name = string("op_8451_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_33_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933776000)))]; fp16 var_8439_to_fp16 = const()[name = string("op_8439_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8451_cast_fp16 = layer_norm(axes = var_8451_axes_0, epsilon = var_8439_to_fp16, gamma = model_model_layers_33_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_8451_cast_fp16")]; tensor var_8465 = const()[name = string("op_8465"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_8466 = transpose(perm = var_8465, x = var_8451_cast_fp16)[name = string("transpose_13")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_8466)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_33_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_33_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_33_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_8488_axes_0 = const()[name = string("op_8488_axes_0"), val = tensor([2])]; tensor var_8488 = squeeze(axes = var_8488_axes_0, x = e_31)[name = string("op_8488")]; tensor var_8489 = const()[name = string("op_8489"), val = tensor([0, 2, 1])]; tensor var_8490 = transpose(perm = var_8489, x = var_8488)[name = string("transpose_12")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_8490)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_8508_axes_0 = const()[name = string("op_8508_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_34_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933781184)))]; fp16 var_8496_to_fp16 = const()[name = string("op_8496_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8508_cast_fp16 = layer_norm(axes = var_8508_axes_0, epsilon = var_8496_to_fp16, gamma = model_model_layers_34_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_8508_cast_fp16")]; tensor var_8514 = const()[name = string("op_8514"), val = tensor([0, 2, 1])]; tensor var_8517_axes_0 = const()[name = string("op_8517_axes_0"), val = tensor([2])]; tensor var_8515 = transpose(perm = var_8514, x = var_8508_cast_fp16)[name = string("transpose_11")]; tensor var_8517 = expand_dims(axes = var_8517_axes_0, x = var_8515)[name = string("op_8517")]; string var_8533_pad_type_0 = const()[name = string("op_8533_pad_type_0"), val = string("valid")]; tensor var_8533_strides_0 = const()[name = string("op_8533_strides_0"), val = tensor([1, 1])]; tensor var_8533_pad_0 = const()[name = string("op_8533_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8533_dilations_0 = const()[name = string("op_8533_dilations_0"), val = tensor([1, 1])]; int32 var_8533_groups_0 = const()[name = string("op_8533_groups_0"), val = int32(1)]; tensor var_8533 = conv(dilations = var_8533_dilations_0, groups = var_8533_groups_0, pad = var_8533_pad_0, pad_type = var_8533_pad_type_0, strides = var_8533_strides_0, weight = model_model_layers_34_self_attn_q_proj_weight_palettized, x = var_8517)[name = string("op_8533")]; tensor var_8538 = const()[name = string("op_8538"), val = tensor([1, 32, 1, 128])]; tensor var_8539 = reshape(shape = var_8538, x = var_8533)[name = string("op_8539")]; string var_8555_pad_type_0 = const()[name = string("op_8555_pad_type_0"), val = string("valid")]; tensor var_8555_strides_0 = const()[name = string("op_8555_strides_0"), val = tensor([1, 1])]; tensor var_8555_pad_0 = const()[name = string("op_8555_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8555_dilations_0 = const()[name = string("op_8555_dilations_0"), val = tensor([1, 1])]; int32 var_8555_groups_0 = const()[name = string("op_8555_groups_0"), val = int32(1)]; tensor var_8555 = conv(dilations = var_8555_dilations_0, groups = var_8555_groups_0, pad = var_8555_pad_0, pad_type = var_8555_pad_type_0, strides = var_8555_strides_0, weight = model_model_layers_34_self_attn_k_proj_weight_palettized, x = var_8517)[name = string("op_8555")]; tensor var_8560 = const()[name = string("op_8560"), val = tensor([1, 8, 1, 128])]; tensor var_8561 = reshape(shape = var_8560, x = var_8555)[name = string("op_8561")]; string var_8577_pad_type_0 = const()[name = string("op_8577_pad_type_0"), val = string("valid")]; tensor var_8577_strides_0 = const()[name = string("op_8577_strides_0"), val = tensor([1, 1])]; tensor var_8577_pad_0 = const()[name = string("op_8577_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8577_dilations_0 = const()[name = string("op_8577_dilations_0"), val = tensor([1, 1])]; int32 var_8577_groups_0 = const()[name = string("op_8577_groups_0"), val = int32(1)]; tensor var_8577 = conv(dilations = var_8577_dilations_0, groups = var_8577_groups_0, pad = var_8577_pad_0, pad_type = var_8577_pad_type_0, strides = var_8577_strides_0, weight = model_model_layers_34_self_attn_v_proj_weight_palettized, x = var_8517)[name = string("op_8577")]; tensor var_8582 = const()[name = string("op_8582"), val = tensor([1, 8, 1, 128])]; tensor var_8583 = reshape(shape = var_8582, x = var_8577)[name = string("op_8583")]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = var_8539)[name = string("mean_131")]; tensor input_293 = sub(x = var_8539, y = mean_131)[name = string("input_293")]; tensor var_8604_axes_0 = const()[name = string("op_8604_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_34_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786368)))]; fp16 var_8592_to_fp16 = const()[name = string("op_8592_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8604_cast_fp16 = layer_norm(axes = var_8604_axes_0, epsilon = var_8592_to_fp16, gamma = model_model_layers_34_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_8604_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = var_8561)[name = string("mean_133")]; tensor input_295 = sub(x = var_8561, y = mean_133)[name = string("input_295")]; tensor var_8622_axes_0 = const()[name = string("op_8622_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_34_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786688)))]; fp16 var_8610_to_fp16 = const()[name = string("op_8610_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8622_cast_fp16 = layer_norm(axes = var_8622_axes_0, epsilon = var_8610_to_fp16, gamma = model_model_layers_34_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_8622_cast_fp16")]; tensor var_8625 = mul(x = var_8604_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8625")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_8604_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_8604_cast_fp16)[name = string("x2_65")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_8646 = mul(x = x2_65, y = const_293_promoted)[name = string("op_8646")]; int32 var_8648 = const()[name = string("op_8648"), val = int32(-1)]; bool var_8649_interleave_0 = const()[name = string("op_8649_interleave_0"), val = bool(false)]; tensor var_8649 = concat(axis = var_8648, interleave = var_8649_interleave_0, values = (var_8646, x1_65))[name = string("op_8649")]; tensor var_8650 = mul(x = var_8649, y = sin_1_cast_fp16)[name = string("op_8650")]; tensor query_states_65 = add(x = var_8625, y = var_8650)[name = string("query_states_65")]; tensor var_8653 = mul(x = var_8622_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8653")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_8622_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_8622_cast_fp16)[name = string("x2_67")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_8674 = mul(x = x2_67, y = const_296_promoted)[name = string("op_8674")]; int32 var_8676 = const()[name = string("op_8676"), val = int32(-1)]; bool var_8677_interleave_0 = const()[name = string("op_8677_interleave_0"), val = bool(false)]; tensor var_8677 = concat(axis = var_8676, interleave = var_8677_interleave_0, values = (var_8674, x1_67))[name = string("op_8677")]; tensor var_8678 = mul(x = var_8677, y = sin_1_cast_fp16)[name = string("op_8678")]; tensor key_states_65 = add(x = var_8653, y = var_8678)[name = string("key_states_65")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([34])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([35])]; int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1195, concat_131_values3_0))[name = string("concat_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_65, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([70])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([71])]; int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1195, concat_135_values3_0))[name = string("concat_135")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = var_8583, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; tensor var_8733_begin_0 = const()[name = string("op_8733_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_8733_end_0 = const()[name = string("op_8733_end_0"), val = tensor([35, 8, 1024, 128])]; tensor var_8733_end_mask_0 = const()[name = string("op_8733_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8733_cast_fp16 = slice_by_index(begin = var_8733_begin_0, end = var_8733_end_0, end_mask = var_8733_end_mask_0, x = coreml_update_state_69)[name = string("op_8733_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_8733_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_8740_begin_0 = const()[name = string("op_8740_begin_0"), val = tensor([70, 0, 0, 0])]; tensor var_8740_end_0 = const()[name = string("op_8740_end_0"), val = tensor([71, 8, 1024, 128])]; tensor var_8740_end_mask_0 = const()[name = string("op_8740_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8740_cast_fp16 = slice_by_index(begin = var_8740_begin_0, end = var_8740_end_0, end_mask = var_8740_end_mask_0, x = coreml_update_state_69)[name = string("op_8740_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_8740_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_8777 = const()[name = string("op_8777"), val = tensor([1, 4, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_8777, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_8789 = const()[name = string("op_8789"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_8789, x = x_329_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_8797 = const()[name = string("op_8797"), val = tensor([1, 4, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_8797, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; tensor var_8809 = const()[name = string("op_8809"), val = tensor([1, -1, 1024, 128])]; tensor value_states_99_cast_fp16 = reshape(shape = var_8809, x = x_335_cast_fp16)[name = string("value_states_99_cast_fp16")]; bool var_8824_transpose_x_1 = const()[name = string("op_8824_transpose_x_1"), val = bool(false)]; bool var_8824_transpose_y_1 = const()[name = string("op_8824_transpose_y_1"), val = bool(true)]; tensor var_8824 = matmul(transpose_x = var_8824_transpose_x_1, transpose_y = var_8824_transpose_y_1, x = query_states_65, y = key_states_67_cast_fp16)[name = string("op_8824")]; fp16 var_8825_to_fp16 = const()[name = string("op_8825_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_8824, y = var_8825_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_8860 = const()[name = string("op_8860"), val = int32(-1)]; tensor attn_weights_101_cast_fp16 = softmax(axis = var_8860, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = value_states_99_cast_fp16)[name = string("attn_output_161_cast_fp16")]; tensor var_8871_perm_0 = const()[name = string("op_8871_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8875 = const()[name = string("op_8875"), val = tensor([1, 1, 4096])]; tensor var_8871_cast_fp16 = transpose(perm = var_8871_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_10")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_8875, x = var_8871_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_8880 = const()[name = string("op_8880"), val = tensor([0, 2, 1])]; string var_8896_pad_type_0 = const()[name = string("op_8896_pad_type_0"), val = string("valid")]; int32 var_8896_groups_0 = const()[name = string("op_8896_groups_0"), val = int32(1)]; tensor var_8896_strides_0 = const()[name = string("op_8896_strides_0"), val = tensor([1])]; tensor var_8896_pad_0 = const()[name = string("op_8896_pad_0"), val = tensor([0, 0])]; tensor var_8896_dilations_0 = const()[name = string("op_8896_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939029952))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8881_cast_fp16 = transpose(perm = var_8880, x = attn_output_165_cast_fp16)[name = string("transpose_9")]; tensor var_8896_cast_fp16 = conv(dilations = var_8896_dilations_0, groups = var_8896_groups_0, pad = var_8896_pad_0, pad_type = var_8896_pad_type_0, strides = var_8896_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_8881_cast_fp16)[name = string("op_8896_cast_fp16")]; tensor var_8900 = const()[name = string("op_8900"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_8900, x = var_8896_cast_fp16)[name = string("transpose_8")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_8919_axes_0 = const()[name = string("op_8919_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_34_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939040256)))]; fp16 var_8907_to_fp16 = const()[name = string("op_8907_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8919_cast_fp16 = layer_norm(axes = var_8919_axes_0, epsilon = var_8907_to_fp16, gamma = model_model_layers_34_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_8919_cast_fp16")]; tensor var_8933 = const()[name = string("op_8933"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_8934 = transpose(perm = var_8933, x = var_8919_cast_fp16)[name = string("transpose_7")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_8934)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_34_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_34_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_34_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_8956_axes_0 = const()[name = string("op_8956_axes_0"), val = tensor([2])]; tensor var_8956 = squeeze(axes = var_8956_axes_0, x = e_33)[name = string("op_8956")]; tensor var_8957 = const()[name = string("op_8957"), val = tensor([0, 2, 1])]; tensor var_8958 = transpose(perm = var_8957, x = var_8956)[name = string("transpose_6")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_8958)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_8976_axes_0 = const()[name = string("op_8976_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_35_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939045440)))]; fp16 var_8964_to_fp16 = const()[name = string("op_8964_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8976_cast_fp16 = layer_norm(axes = var_8976_axes_0, epsilon = var_8964_to_fp16, gamma = model_model_layers_35_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_8976_cast_fp16")]; tensor var_8982 = const()[name = string("op_8982"), val = tensor([0, 2, 1])]; tensor var_8985_axes_0 = const()[name = string("op_8985_axes_0"), val = tensor([2])]; tensor var_8983 = transpose(perm = var_8982, x = var_8976_cast_fp16)[name = string("transpose_5")]; tensor var_8985 = expand_dims(axes = var_8985_axes_0, x = var_8983)[name = string("op_8985")]; string var_9001_pad_type_0 = const()[name = string("op_9001_pad_type_0"), val = string("valid")]; tensor var_9001_strides_0 = const()[name = string("op_9001_strides_0"), val = tensor([1, 1])]; tensor var_9001_pad_0 = const()[name = string("op_9001_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9001_dilations_0 = const()[name = string("op_9001_dilations_0"), val = tensor([1, 1])]; int32 var_9001_groups_0 = const()[name = string("op_9001_groups_0"), val = int32(1)]; tensor var_9001 = conv(dilations = var_9001_dilations_0, groups = var_9001_groups_0, pad = var_9001_pad_0, pad_type = var_9001_pad_type_0, strides = var_9001_strides_0, weight = model_model_layers_35_self_attn_q_proj_weight_palettized, x = var_8985)[name = string("op_9001")]; tensor var_9006 = const()[name = string("op_9006"), val = tensor([1, 32, 1, 128])]; tensor var_9007 = reshape(shape = var_9006, x = var_9001)[name = string("op_9007")]; string var_9023_pad_type_0 = const()[name = string("op_9023_pad_type_0"), val = string("valid")]; tensor var_9023_strides_0 = const()[name = string("op_9023_strides_0"), val = tensor([1, 1])]; tensor var_9023_pad_0 = const()[name = string("op_9023_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9023_dilations_0 = const()[name = string("op_9023_dilations_0"), val = tensor([1, 1])]; int32 var_9023_groups_0 = const()[name = string("op_9023_groups_0"), val = int32(1)]; tensor var_9023 = conv(dilations = var_9023_dilations_0, groups = var_9023_groups_0, pad = var_9023_pad_0, pad_type = var_9023_pad_type_0, strides = var_9023_strides_0, weight = model_model_layers_35_self_attn_k_proj_weight_palettized, x = var_8985)[name = string("op_9023")]; tensor var_9028 = const()[name = string("op_9028"), val = tensor([1, 8, 1, 128])]; tensor var_9029 = reshape(shape = var_9028, x = var_9023)[name = string("op_9029")]; string var_9045_pad_type_0 = const()[name = string("op_9045_pad_type_0"), val = string("valid")]; tensor var_9045_strides_0 = const()[name = string("op_9045_strides_0"), val = tensor([1, 1])]; tensor var_9045_pad_0 = const()[name = string("op_9045_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9045_dilations_0 = const()[name = string("op_9045_dilations_0"), val = tensor([1, 1])]; int32 var_9045_groups_0 = const()[name = string("op_9045_groups_0"), val = int32(1)]; tensor var_9045 = conv(dilations = var_9045_dilations_0, groups = var_9045_groups_0, pad = var_9045_pad_0, pad_type = var_9045_pad_type_0, strides = var_9045_strides_0, weight = model_model_layers_35_self_attn_v_proj_weight_palettized, x = var_8985)[name = string("op_9045")]; tensor var_9050 = const()[name = string("op_9050"), val = tensor([1, 8, 1, 128])]; tensor var_9051 = reshape(shape = var_9050, x = var_9045)[name = string("op_9051")]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = var_9007)[name = string("mean_139")]; tensor input_311 = sub(x = var_9007, y = mean_139)[name = string("input_311")]; tensor var_9072_axes_0 = const()[name = string("op_9072_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_35_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050624)))]; fp16 var_9060_to_fp16 = const()[name = string("op_9060_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9072_cast_fp16 = layer_norm(axes = var_9072_axes_0, epsilon = var_9060_to_fp16, gamma = model_model_layers_35_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_9072_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = var_9029)[name = string("mean_141")]; tensor input_313 = sub(x = var_9029, y = mean_141)[name = string("input_313")]; tensor var_9090_axes_0 = const()[name = string("op_9090_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_35_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050944)))]; fp16 var_9078_to_fp16 = const()[name = string("op_9078_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9090_cast_fp16 = layer_norm(axes = var_9090_axes_0, epsilon = var_9078_to_fp16, gamma = model_model_layers_35_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_9090_cast_fp16")]; tensor var_9093 = mul(x = var_9072_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9093")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_9072_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_9072_cast_fp16)[name = string("x2_69")]; fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; tensor var_9114 = mul(x = x2_69, y = const_311_promoted)[name = string("op_9114")]; int32 var_9116 = const()[name = string("op_9116"), val = int32(-1)]; bool var_9117_interleave_0 = const()[name = string("op_9117_interleave_0"), val = bool(false)]; tensor var_9117 = concat(axis = var_9116, interleave = var_9117_interleave_0, values = (var_9114, x1_69))[name = string("op_9117")]; tensor var_9118 = mul(x = var_9117, y = sin_1_cast_fp16)[name = string("op_9118")]; tensor query_states_69 = add(x = var_9093, y = var_9118)[name = string("query_states_69")]; tensor var_9121 = mul(x = var_9090_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9121")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_9090_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_9090_cast_fp16)[name = string("x2")]; fp16 const_314_promoted = const()[name = string("const_314_promoted"), val = fp16(-0x1p+0)]; tensor var_9142 = mul(x = x2, y = const_314_promoted)[name = string("op_9142")]; int32 var_9144 = const()[name = string("op_9144"), val = int32(-1)]; bool var_9145_interleave_0 = const()[name = string("op_9145_interleave_0"), val = bool(false)]; tensor var_9145 = concat(axis = var_9144, interleave = var_9145_interleave_0, values = (var_9142, x1))[name = string("op_9145")]; tensor var_9146 = mul(x = var_9145, y = sin_1_cast_fp16)[name = string("op_9146")]; tensor key_states_69 = add(x = var_9121, y = var_9146)[name = string("key_states_69")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([35])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([36])]; int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_138")]; tensor concat_139_values1_0 = const()[name = string("concat_139_values1_0"), val = tensor([0])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_208, concat_139_values1_0, var_1195, concat_139_values3_0))[name = string("concat_139")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_138, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_139, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_69, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([71])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([72])]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_142")]; tensor concat_143_values1_0 = const()[name = string("concat_143_values1_0"), val = tensor([0])]; tensor concat_143_values3_0 = const()[name = string("concat_143_values3_0"), val = tensor([0])]; int32 concat_143_axis_0 = const()[name = string("concat_143_axis_0"), val = int32(0)]; bool concat_143_interleave_0 = const()[name = string("concat_143_interleave_0"), val = bool(false)]; tensor concat_143 = concat(axis = concat_143_axis_0, interleave = concat_143_interleave_0, values = (expand_dims_214, concat_143_values1_0, var_1195, concat_143_values3_0))[name = string("concat_143")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_142, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_143, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = var_9051, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; tensor var_9201_begin_0 = const()[name = string("op_9201_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_9201_end_0 = const()[name = string("op_9201_end_0"), val = tensor([36, 8, 1024, 128])]; tensor var_9201_end_mask_0 = const()[name = string("op_9201_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9201_cast_fp16 = slice_by_index(begin = var_9201_begin_0, end = var_9201_end_0, end_mask = var_9201_end_mask_0, x = coreml_update_state_71)[name = string("op_9201_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_9201_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_9208_begin_0 = const()[name = string("op_9208_begin_0"), val = tensor([71, 0, 0, 0])]; tensor var_9208_end_0 = const()[name = string("op_9208_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_9208_end_mask_0 = const()[name = string("op_9208_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9208_cast_fp16 = slice_by_index(begin = var_9208_begin_0, end = var_9208_end_0, end_mask = var_9208_end_mask_0, x = coreml_update_state_71)[name = string("op_9208_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_9208_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_9245 = const()[name = string("op_9245"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_9245, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_9257 = const()[name = string("op_9257"), val = tensor([1, -1, 1024, 128])]; tensor key_states_cast_fp16 = reshape(shape = var_9257, x = x_349_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_9265 = const()[name = string("op_9265"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_9265, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_9277 = const()[name = string("op_9277"), val = tensor([1, -1, 1024, 128])]; tensor value_states_105_cast_fp16 = reshape(shape = var_9277, x = x_355_cast_fp16)[name = string("value_states_105_cast_fp16")]; bool var_9292_transpose_x_1 = const()[name = string("op_9292_transpose_x_1"), val = bool(false)]; bool var_9292_transpose_y_1 = const()[name = string("op_9292_transpose_y_1"), val = bool(true)]; tensor var_9292 = matmul(transpose_x = var_9292_transpose_x_1, transpose_y = var_9292_transpose_y_1, x = query_states_69, y = key_states_cast_fp16)[name = string("op_9292")]; fp16 var_9293_to_fp16 = const()[name = string("op_9293_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_9292, y = var_9293_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_9328 = const()[name = string("op_9328"), val = int32(-1)]; tensor attn_weights_cast_fp16 = softmax(axis = var_9328, x = attn_weights_105_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_105_cast_fp16)[name = string("attn_output_171_cast_fp16")]; tensor var_9339_perm_0 = const()[name = string("op_9339_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9343 = const()[name = string("op_9343"), val = tensor([1, 1, 4096])]; tensor var_9339_cast_fp16 = transpose(perm = var_9339_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_4")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_9343, x = var_9339_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_9348 = const()[name = string("op_9348"), val = tensor([0, 2, 1])]; string var_9364_pad_type_0 = const()[name = string("op_9364_pad_type_0"), val = string("valid")]; int32 var_9364_groups_0 = const()[name = string("op_9364_groups_0"), val = int32(1)]; tensor var_9364_strides_0 = const()[name = string("op_9364_strides_0"), val = tensor([1])]; tensor var_9364_pad_0 = const()[name = string("op_9364_pad_0"), val = tensor([0, 0])]; tensor var_9364_dilations_0 = const()[name = string("op_9364_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939051264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944294208))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9349_cast_fp16 = transpose(perm = var_9348, x = attn_output_175_cast_fp16)[name = string("transpose_3")]; tensor var_9364_cast_fp16 = conv(dilations = var_9364_dilations_0, groups = var_9364_groups_0, pad = var_9364_pad_0, pad_type = var_9364_pad_type_0, strides = var_9364_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_9349_cast_fp16)[name = string("op_9364_cast_fp16")]; tensor var_9368 = const()[name = string("op_9368"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_9368, x = var_9364_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_107_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor mean_143_axes_0 = const()[name = string("mean_143_axes_0"), val = tensor([-1])]; bool mean_143_keep_dims_0 = const()[name = string("mean_143_keep_dims_0"), val = bool(true)]; tensor mean_143_cast_fp16 = reduce_mean(axes = mean_143_axes_0, keep_dims = mean_143_keep_dims_0, x = hidden_states_107_cast_fp16)[name = string("mean_143_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_107_cast_fp16, y = mean_143_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_9387_axes_0 = const()[name = string("op_9387_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_35_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944304512)))]; fp16 var_9375_to_fp16 = const()[name = string("op_9375_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9387_cast_fp16 = layer_norm(axes = var_9387_axes_0, epsilon = var_9375_to_fp16, gamma = model_model_layers_35_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_9387_cast_fp16")]; tensor var_9401 = const()[name = string("op_9401"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_9402 = transpose(perm = var_9401, x = var_9387_cast_fp16)[name = string("transpose_1")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_9402)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_35_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_35_mlp_up_proj_weight_palettized, x = input_319)[name = string("b")]; tensor c = silu(x = input_321)[name = string("c")]; tensor input_323 = mul(x = c, y = b)[name = string("input_323")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_35_mlp_down_proj_weight_palettized, x = input_323)[name = string("e")]; tensor var_9424_axes_0 = const()[name = string("op_9424_axes_0"), val = tensor([2])]; tensor var_9424 = squeeze(axes = var_9424_axes_0, x = e)[name = string("op_9424")]; tensor var_9425 = const()[name = string("op_9425"), val = tensor([0, 2, 1])]; tensor var_9426 = transpose(perm = var_9425, x = var_9424)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = var_9426)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_9444_axes_0 = const()[name = string("op_9444_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944309696)))]; fp16 var_9432_to_fp16 = const()[name = string("op_9432_to_fp16"), val = fp16(0x1.1p-20)]; tensor output_hidden_states = layer_norm(axes = var_9444_axes_0, epsilon = var_9432_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_9444_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5243008))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5259456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6570240))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6574400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885184))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20341248))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20380224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32832128))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32871104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45323008))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50576256))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50592704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51903488))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51907648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53218432))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53222592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65674496))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65713472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78165376))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78204352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90656256))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90666560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95909504))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97236736))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97240896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98551680))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98555840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111007744))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111046720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123498624))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123537600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135989504))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135999808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141242752))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141259200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142569984))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142574144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143884928))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143889088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156340992))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156379968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168831872))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168870848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181322752))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181333056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186576000))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186592448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187903232))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187907392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189218176))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189222336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201674240))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201713216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214165120))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214204096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226656000))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226666304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231909248))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231925696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233236480))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233240640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234551424))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234555584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247007488))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247046464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259498368))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259537344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271989248))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271999552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277242496))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278569728))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278573888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279884672))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279888832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292340736))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292379712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304831616))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304870592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317322496))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317332800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322575744))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322592192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323902976))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323907136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325217920))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325222080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337673984))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337712960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350164864))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350203840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362655744))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362666048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367908992))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367925440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369236224))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369240384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370551168))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370555328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383007232))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383046208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395498112))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395537088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407988992))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407999296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413242240))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413258688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414569472))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414573632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415884416))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415888576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428340480))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428379456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440831360))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440870336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453322240))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453332544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458575488))))[name = string("model_model_layers_28_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458591936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459902720))))[name = string("model_model_layers_28_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_28_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459906880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461217664))))[name = string("model_model_layers_28_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_28_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461221824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473673728))))[name = string("model_model_layers_28_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_28_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473712704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486164608))))[name = string("model_model_layers_28_mlp_up_proj_weight_palettized")]; tensor model_model_layers_28_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486203584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498655488))))[name = string("model_model_layers_28_mlp_down_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503908736))))[name = string("model_model_layers_29_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503925184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505235968))))[name = string("model_model_layers_29_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_29_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505240128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506550912))))[name = string("model_model_layers_29_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_29_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506555072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519006976))))[name = string("model_model_layers_29_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_29_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519045952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531497856))))[name = string("model_model_layers_29_mlp_up_proj_weight_palettized")]; tensor model_model_layers_29_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531536832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543988736))))[name = string("model_model_layers_29_mlp_down_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543999040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549241984))))[name = string("model_model_layers_30_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549258432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550569216))))[name = string("model_model_layers_30_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_30_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550573376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551884160))))[name = string("model_model_layers_30_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_30_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551888320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564340224))))[name = string("model_model_layers_30_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_30_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564379200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576831104))))[name = string("model_model_layers_30_mlp_up_proj_weight_palettized")]; tensor model_model_layers_30_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576870080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589321984))))[name = string("model_model_layers_30_mlp_down_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589332288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594575232))))[name = string("model_model_layers_31_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594591680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595902464))))[name = string("model_model_layers_31_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_31_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595906624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597217408))))[name = string("model_model_layers_31_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_31_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597221568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609673472))))[name = string("model_model_layers_31_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_31_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609712448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622164352))))[name = string("model_model_layers_31_mlp_up_proj_weight_palettized")]; tensor model_model_layers_31_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622203328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634655232))))[name = string("model_model_layers_31_mlp_down_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634665536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639908480))))[name = string("model_model_layers_32_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639924928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641235712))))[name = string("model_model_layers_32_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_32_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641239872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642550656))))[name = string("model_model_layers_32_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_32_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642554816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655006720))))[name = string("model_model_layers_32_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_32_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655045696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667497600))))[name = string("model_model_layers_32_mlp_up_proj_weight_palettized")]; tensor model_model_layers_32_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667536576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679988480))))[name = string("model_model_layers_32_mlp_down_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685241728))))[name = string("model_model_layers_33_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685258176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686568960))))[name = string("model_model_layers_33_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_33_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686573120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687883904))))[name = string("model_model_layers_33_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_33_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700339968))))[name = string("model_model_layers_33_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_33_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712830848))))[name = string("model_model_layers_33_mlp_up_proj_weight_palettized")]; tensor model_model_layers_33_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712869824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725321728))))[name = string("model_model_layers_33_mlp_down_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725332032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730574976))))[name = string("model_model_layers_34_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731902208))))[name = string("model_model_layers_34_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_34_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731906368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733217152))))[name = string("model_model_layers_34_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_34_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(733221312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745673216))))[name = string("model_model_layers_34_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_34_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745712192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758164096))))[name = string("model_model_layers_34_mlp_up_proj_weight_palettized")]; tensor model_model_layers_34_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758203072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770654976))))[name = string("model_model_layers_34_mlp_down_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770665280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775908224))))[name = string("model_model_layers_35_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775924672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777235456))))[name = string("model_model_layers_35_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_35_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(777239616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778550400))))[name = string("model_model_layers_35_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_35_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778554560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791006464))))[name = string("model_model_layers_35_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_35_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791045440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803497344))))[name = string("model_model_layers_35_mlp_up_proj_weight_palettized")]; tensor model_model_layers_35_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803536320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815988224))))[name = string("model_model_layers_35_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_978_axis_0 = const()[name = string("op_978_axis_0"), val = int32(1)]; int32 var_978_batch_dims_0 = const()[name = string("op_978_batch_dims_0"), val = int32(0)]; bool var_978_validate_indices_0 = const()[name = string("op_978_validate_indices_0"), val = bool(false)]; tensor var_970_to_fp16 = const()[name = string("op_970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832775808)))]; tensor var_978_cast_fp16 = gather(axis = var_978_axis_0, batch_dims = var_978_batch_dims_0, indices = select_0, validate_indices = var_978_validate_indices_0, x = var_970_to_fp16)[name = string("op_978_cast_fp16")]; tensor var_982 = const()[name = string("op_982"), val = tensor([1, 64, 1, 128])]; tensor cos_1_cast_fp16 = reshape(shape = var_982, x = var_978_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_992_axis_0 = const()[name = string("op_992_axis_0"), val = int32(1)]; int32 var_992_batch_dims_0 = const()[name = string("op_992_batch_dims_0"), val = int32(0)]; bool var_992_validate_indices_0 = const()[name = string("op_992_validate_indices_0"), val = bool(false)]; tensor var_984_to_fp16 = const()[name = string("op_984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815998528)))]; tensor var_992_cast_fp16 = gather(axis = var_992_axis_0, batch_dims = var_992_batch_dims_0, indices = select_0, validate_indices = var_992_validate_indices_0, x = var_984_to_fp16)[name = string("op_992_cast_fp16")]; tensor var_996 = const()[name = string("op_996"), val = tensor([1, 64, 1, 128])]; tensor sin_1_cast_fp16 = reshape(shape = var_996, x = var_992_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1023_axes_0 = const()[name = string("op_1023_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849553088)))]; fp16 var_1011_to_fp16 = const()[name = string("op_1011_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1023_cast_fp16 = layer_norm(axes = var_1023_axes_0, epsilon = var_1011_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1023_cast_fp16")]; tensor var_1035 = const()[name = string("op_1035"), val = tensor([0, 2, 1])]; tensor var_1038_axes_0 = const()[name = string("op_1038_axes_0"), val = tensor([2])]; tensor var_1036 = transpose(perm = var_1035, x = var_1023_cast_fp16)[name = string("transpose_163")]; tensor var_1038 = expand_dims(axes = var_1038_axes_0, x = var_1036)[name = string("op_1038")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_1038)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_1038)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_1038)[name = string("value_states_1")]; tensor var_1080 = const()[name = string("op_1080"), val = tensor([1, 32, 128, 64])]; tensor var_1081 = reshape(shape = var_1080, x = query_states_1)[name = string("op_1081")]; tensor var_1086 = const()[name = string("op_1086"), val = tensor([0, 1, 3, 2])]; tensor var_1091 = const()[name = string("op_1091"), val = tensor([1, 8, 128, 64])]; tensor var_1092 = reshape(shape = var_1091, x = key_states_1)[name = string("op_1092")]; tensor var_1097 = const()[name = string("op_1097"), val = tensor([0, 1, 3, 2])]; tensor var_1102 = const()[name = string("op_1102"), val = tensor([1, 8, 128, 64])]; tensor var_1103 = reshape(shape = var_1102, x = value_states_1)[name = string("op_1103")]; tensor var_1108 = const()[name = string("op_1108"), val = tensor([0, 1, 3, 2])]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor x_1 = transpose(perm = var_1086, x = var_1081)[name = string("transpose_162")]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = x_1)[name = string("mean_3")]; tensor input_5 = sub(x = x_1, y = mean_3)[name = string("input_5")]; tensor var_1125_axes_0 = const()[name = string("op_1125_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558272)))]; fp16 var_1113_to_fp16 = const()[name = string("op_1113_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1125_cast_fp16 = layer_norm(axes = var_1125_axes_0, epsilon = var_1113_to_fp16, gamma = model_model_layers_18_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1125_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor x_3 = transpose(perm = var_1097, x = var_1092)[name = string("transpose_161")]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = x_3)[name = string("mean_5")]; tensor input_7 = sub(x = x_3, y = mean_5)[name = string("input_7")]; tensor var_1143_axes_0 = const()[name = string("op_1143_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558592)))]; fp16 var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1143_cast_fp16 = layer_norm(axes = var_1143_axes_0, epsilon = var_1131_to_fp16, gamma = model_model_layers_18_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1143_cast_fp16")]; tensor var_1150 = const()[name = string("op_1150"), val = tensor([0, 2, 1, 3])]; tensor var_1156 = const()[name = string("op_1156"), val = tensor([0, 2, 1, 3])]; tensor cos_5 = transpose(perm = var_1150, x = cos_1_cast_fp16)[name = string("transpose_160")]; tensor var_1158 = mul(x = var_1125_cast_fp16, y = cos_5)[name = string("op_1158")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1125_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1125_cast_fp16)[name = string("x2_1")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1179 = mul(x = x2_1, y = const_7_promoted)[name = string("op_1179")]; int32 var_1181 = const()[name = string("op_1181"), val = int32(-1)]; bool var_1182_interleave_0 = const()[name = string("op_1182_interleave_0"), val = bool(false)]; tensor var_1182 = concat(axis = var_1181, interleave = var_1182_interleave_0, values = (var_1179, x1_1))[name = string("op_1182")]; tensor sin_5 = transpose(perm = var_1156, x = sin_1_cast_fp16)[name = string("transpose_159")]; tensor var_1183 = mul(x = var_1182, y = sin_5)[name = string("op_1183")]; tensor query_states_3 = add(x = var_1158, y = var_1183)[name = string("query_states_3")]; tensor var_1186 = mul(x = var_1143_cast_fp16, y = cos_5)[name = string("op_1186")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1143_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1143_cast_fp16)[name = string("x2_3")]; fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; tensor var_1207 = mul(x = x2_3, y = const_10_promoted)[name = string("op_1207")]; int32 var_1209 = const()[name = string("op_1209"), val = int32(-1)]; bool var_1210_interleave_0 = const()[name = string("op_1210_interleave_0"), val = bool(false)]; tensor var_1210 = concat(axis = var_1209, interleave = var_1210_interleave_0, values = (var_1207, x1_3))[name = string("op_1210")]; tensor var_1211 = mul(x = var_1210, y = sin_5)[name = string("op_1211")]; tensor key_states_3 = add(x = var_1186, y = var_1211)[name = string("key_states_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_1233 = add(x = current_pos, y = seq_length_1)[name = string("op_1233")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([18])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([19])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1233, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_36")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([54])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([55])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1233, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_1108, x = var_1103)[name = string("transpose_158")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_36)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_37")]; tensor var_1282_begin_0 = const()[name = string("op_1282_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_1282_end_0 = const()[name = string("op_1282_end_0"), val = tensor([19, 8, 1024, 128])]; tensor var_1282_end_mask_0 = const()[name = string("op_1282_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1282_cast_fp16 = slice_by_index(begin = var_1282_begin_0, end = var_1282_end_0, end_mask = var_1282_end_mask_0, x = coreml_update_state_37)[name = string("op_1282_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1282_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1289_begin_0 = const()[name = string("op_1289_begin_0"), val = tensor([54, 0, 0, 0])]; tensor var_1289_end_0 = const()[name = string("op_1289_end_0"), val = tensor([55, 8, 1024, 128])]; tensor var_1289_end_mask_0 = const()[name = string("op_1289_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1289_cast_fp16 = slice_by_index(begin = var_1289_begin_0, end = var_1289_end_0, end_mask = var_1289_end_mask_0, x = coreml_update_state_37)[name = string("op_1289_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1289_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1318 = const()[name = string("op_1318"), val = tensor([1, 4, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1318, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_1330, x = x_9_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1338 = const()[name = string("op_1338"), val = tensor([1, 4, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1338, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; bool var_1365_transpose_x_0 = const()[name = string("op_1365_transpose_x_0"), val = bool(false)]; bool var_1365_transpose_y_0 = const()[name = string("op_1365_transpose_y_0"), val = bool(true)]; tensor var_1365 = matmul(transpose_x = var_1365_transpose_x_0, transpose_y = var_1365_transpose_y_0, x = query_states_3, y = key_states_7_cast_fp16)[name = string("op_1365")]; fp16 var_1366_to_fp16 = const()[name = string("op_1366_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1365, y = var_1366_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1401 = const()[name = string("op_1401"), val = int32(-1)]; tensor var_1403_cast_fp16 = softmax(axis = var_1401, x = attn_weights_3_cast_fp16)[name = string("op_1403_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 1024])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_1403_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1024, 128])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_15_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 128])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_1415_perm_0 = const()[name = string("op_1415_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1434 = const()[name = string("op_1434"), val = tensor([1, 64, 4096])]; tensor var_1415_cast_fp16 = transpose(perm = var_1415_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_157")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1434, x = var_1415_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1439 = const()[name = string("op_1439"), val = tensor([0, 2, 1])]; string var_1455_pad_type_0 = const()[name = string("op_1455_pad_type_0"), val = string("valid")]; int32 var_1455_groups_0 = const()[name = string("op_1455_groups_0"), val = int32(1)]; tensor var_1455_strides_0 = const()[name = string("op_1455_strides_0"), val = tensor([1])]; tensor var_1455_pad_0 = const()[name = string("op_1455_pad_0"), val = tensor([0, 0])]; tensor var_1455_dilations_0 = const()[name = string("op_1455_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849558912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854801856))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1440_cast_fp16 = transpose(perm = var_1439, x = attn_output_5_cast_fp16)[name = string("transpose_156")]; tensor var_1455_cast_fp16 = conv(dilations = var_1455_dilations_0, groups = var_1455_groups_0, pad = var_1455_pad_0, pad_type = var_1455_pad_type_0, strides = var_1455_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1440_cast_fp16)[name = string("op_1455_cast_fp16")]; tensor var_1459 = const()[name = string("op_1459"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1459, x = var_1455_cast_fp16)[name = string("transpose_155")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_1478_axes_0 = const()[name = string("op_1478_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854812160)))]; fp16 var_1466_to_fp16 = const()[name = string("op_1466_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1478_cast_fp16 = layer_norm(axes = var_1478_axes_0, epsilon = var_1466_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor var_1492 = const()[name = string("op_1492"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_1493 = transpose(perm = var_1492, x = var_1478_cast_fp16)[name = string("transpose_154")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_1493)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_1515_axes_0 = const()[name = string("op_1515_axes_0"), val = tensor([2])]; tensor var_1515 = squeeze(axes = var_1515_axes_0, x = e_1)[name = string("op_1515")]; tensor var_1516 = const()[name = string("op_1516"), val = tensor([0, 2, 1])]; tensor var_1517 = transpose(perm = var_1516, x = var_1515)[name = string("transpose_153")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_1517)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_1535_axes_0 = const()[name = string("op_1535_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854817344)))]; fp16 var_1523_to_fp16 = const()[name = string("op_1523_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1535_cast_fp16 = layer_norm(axes = var_1535_axes_0, epsilon = var_1523_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_1535_cast_fp16")]; tensor var_1547 = const()[name = string("op_1547"), val = tensor([0, 2, 1])]; tensor var_1550_axes_0 = const()[name = string("op_1550_axes_0"), val = tensor([2])]; tensor var_1548 = transpose(perm = var_1547, x = var_1535_cast_fp16)[name = string("transpose_152")]; tensor var_1550 = expand_dims(axes = var_1550_axes_0, x = var_1548)[name = string("op_1550")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_1550)[name = string("query_states_9")]; string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_1550)[name = string("key_states_11")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_1550)[name = string("value_states_9")]; tensor var_1592 = const()[name = string("op_1592"), val = tensor([1, 32, 128, 64])]; tensor var_1593 = reshape(shape = var_1592, x = query_states_9)[name = string("op_1593")]; tensor var_1598 = const()[name = string("op_1598"), val = tensor([0, 1, 3, 2])]; tensor var_1603 = const()[name = string("op_1603"), val = tensor([1, 8, 128, 64])]; tensor var_1604 = reshape(shape = var_1603, x = key_states_11)[name = string("op_1604")]; tensor var_1609 = const()[name = string("op_1609"), val = tensor([0, 1, 3, 2])]; tensor var_1614 = const()[name = string("op_1614"), val = tensor([1, 8, 128, 64])]; tensor var_1615 = reshape(shape = var_1614, x = value_states_9)[name = string("op_1615")]; tensor var_1620 = const()[name = string("op_1620"), val = tensor([0, 1, 3, 2])]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor x_21 = transpose(perm = var_1598, x = var_1593)[name = string("transpose_151")]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = x_21)[name = string("mean_11")]; tensor input_23 = sub(x = x_21, y = mean_11)[name = string("input_23")]; tensor var_1637_axes_0 = const()[name = string("op_1637_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822528)))]; fp16 var_1625_to_fp16 = const()[name = string("op_1625_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1637_cast_fp16 = layer_norm(axes = var_1637_axes_0, epsilon = var_1625_to_fp16, gamma = model_model_layers_19_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_1637_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor x_23 = transpose(perm = var_1609, x = var_1604)[name = string("transpose_150")]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = x_23)[name = string("mean_13")]; tensor input_25 = sub(x = x_23, y = mean_13)[name = string("input_25")]; tensor var_1655_axes_0 = const()[name = string("op_1655_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854822848)))]; fp16 var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1655_cast_fp16 = layer_norm(axes = var_1655_axes_0, epsilon = var_1643_to_fp16, gamma = model_model_layers_19_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_1655_cast_fp16")]; tensor var_1670 = mul(x = var_1637_cast_fp16, y = cos_5)[name = string("op_1670")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_1637_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_1637_cast_fp16)[name = string("x2_5")]; fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; tensor var_1691 = mul(x = x2_5, y = const_29_promoted)[name = string("op_1691")]; int32 var_1693 = const()[name = string("op_1693"), val = int32(-1)]; bool var_1694_interleave_0 = const()[name = string("op_1694_interleave_0"), val = bool(false)]; tensor var_1694 = concat(axis = var_1693, interleave = var_1694_interleave_0, values = (var_1691, x1_5))[name = string("op_1694")]; tensor var_1695 = mul(x = var_1694, y = sin_5)[name = string("op_1695")]; tensor query_states_11 = add(x = var_1670, y = var_1695)[name = string("query_states_11")]; tensor var_1698 = mul(x = var_1655_cast_fp16, y = cos_5)[name = string("op_1698")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_1655_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_1655_cast_fp16)[name = string("x2_7")]; fp16 const_32_promoted = const()[name = string("const_32_promoted"), val = fp16(-0x1p+0)]; tensor var_1719 = mul(x = x2_7, y = const_32_promoted)[name = string("op_1719")]; int32 var_1721 = const()[name = string("op_1721"), val = int32(-1)]; bool var_1722_interleave_0 = const()[name = string("op_1722_interleave_0"), val = bool(false)]; tensor var_1722 = concat(axis = var_1721, interleave = var_1722_interleave_0, values = (var_1719, x1_7))[name = string("op_1722")]; tensor var_1723 = mul(x = var_1722, y = sin_5)[name = string("op_1723")]; tensor key_states_13 = add(x = var_1698, y = var_1723)[name = string("key_states_13")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([19])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([20])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_1233, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_13, x = coreml_update_state_37)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_38")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([55])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([56])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_1233, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11 = transpose(perm = var_1620, x = var_1615)[name = string("transpose_149")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_38)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_39")]; tensor var_1794_begin_0 = const()[name = string("op_1794_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1794_end_0 = const()[name = string("op_1794_end_0"), val = tensor([20, 8, 1024, 128])]; tensor var_1794_end_mask_0 = const()[name = string("op_1794_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1794_cast_fp16 = slice_by_index(begin = var_1794_begin_0, end = var_1794_end_0, end_mask = var_1794_end_mask_0, x = coreml_update_state_39)[name = string("op_1794_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_1794_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_1801_begin_0 = const()[name = string("op_1801_begin_0"), val = tensor([55, 0, 0, 0])]; tensor var_1801_end_0 = const()[name = string("op_1801_end_0"), val = tensor([56, 8, 1024, 128])]; tensor var_1801_end_mask_0 = const()[name = string("op_1801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1801_cast_fp16 = slice_by_index(begin = var_1801_begin_0, end = var_1801_end_0, end_mask = var_1801_end_mask_0, x = coreml_update_state_39)[name = string("op_1801_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_1801_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_1830 = const()[name = string("op_1830"), val = tensor([1, 4, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_1830, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_1842 = const()[name = string("op_1842"), val = tensor([1, -1, 1024, 128])]; tensor key_states_17_cast_fp16 = reshape(shape = var_1842, x = x_29_cast_fp16)[name = string("key_states_17_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_1850 = const()[name = string("op_1850"), val = tensor([1, 4, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_1850, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; bool var_1877_transpose_x_0 = const()[name = string("op_1877_transpose_x_0"), val = bool(false)]; bool var_1877_transpose_y_0 = const()[name = string("op_1877_transpose_y_0"), val = bool(true)]; tensor var_1877 = matmul(transpose_x = var_1877_transpose_x_0, transpose_y = var_1877_transpose_y_0, x = query_states_11, y = key_states_17_cast_fp16)[name = string("op_1877")]; fp16 var_1878_to_fp16 = const()[name = string("op_1878_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_1877, y = var_1878_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("attn_weights_7_cast_fp16")]; int32 var_1913 = const()[name = string("op_1913"), val = int32(-1)]; tensor var_1915_cast_fp16 = softmax(axis = var_1913, x = attn_weights_7_cast_fp16)[name = string("op_1915_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 1024])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_1915_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1024, 128])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_35_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 128])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_1927_perm_0 = const()[name = string("op_1927_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1946 = const()[name = string("op_1946"), val = tensor([1, 64, 4096])]; tensor var_1927_cast_fp16 = transpose(perm = var_1927_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_148")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_1946, x = var_1927_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_1951 = const()[name = string("op_1951"), val = tensor([0, 2, 1])]; string var_1967_pad_type_0 = const()[name = string("op_1967_pad_type_0"), val = string("valid")]; int32 var_1967_groups_0 = const()[name = string("op_1967_groups_0"), val = int32(1)]; tensor var_1967_strides_0 = const()[name = string("op_1967_strides_0"), val = tensor([1])]; tensor var_1967_pad_0 = const()[name = string("op_1967_pad_0"), val = tensor([0, 0])]; tensor var_1967_dilations_0 = const()[name = string("op_1967_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854823168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860066112))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1952_cast_fp16 = transpose(perm = var_1951, x = attn_output_15_cast_fp16)[name = string("transpose_147")]; tensor var_1967_cast_fp16 = conv(dilations = var_1967_dilations_0, groups = var_1967_groups_0, pad = var_1967_pad_0, pad_type = var_1967_pad_type_0, strides = var_1967_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1952_cast_fp16)[name = string("op_1967_cast_fp16")]; tensor var_1971 = const()[name = string("op_1971"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_1971, x = var_1967_cast_fp16)[name = string("transpose_146")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_1990_axes_0 = const()[name = string("op_1990_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860076416)))]; fp16 var_1978_to_fp16 = const()[name = string("op_1978_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1990_cast_fp16 = layer_norm(axes = var_1990_axes_0, epsilon = var_1978_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_1990_cast_fp16")]; tensor var_2004 = const()[name = string("op_2004"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_2005 = transpose(perm = var_2004, x = var_1990_cast_fp16)[name = string("transpose_145")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2005)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_2027_axes_0 = const()[name = string("op_2027_axes_0"), val = tensor([2])]; tensor var_2027 = squeeze(axes = var_2027_axes_0, x = e_3)[name = string("op_2027")]; tensor var_2028 = const()[name = string("op_2028"), val = tensor([0, 2, 1])]; tensor var_2029 = transpose(perm = var_2028, x = var_2027)[name = string("transpose_144")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_2029)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_2047_axes_0 = const()[name = string("op_2047_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860081600)))]; fp16 var_2035_to_fp16 = const()[name = string("op_2035_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2047_cast_fp16 = layer_norm(axes = var_2047_axes_0, epsilon = var_2035_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_2047_cast_fp16")]; tensor var_2059 = const()[name = string("op_2059"), val = tensor([0, 2, 1])]; tensor var_2062_axes_0 = const()[name = string("op_2062_axes_0"), val = tensor([2])]; tensor var_2060 = transpose(perm = var_2059, x = var_2047_cast_fp16)[name = string("transpose_143")]; tensor var_2062 = expand_dims(axes = var_2062_axes_0, x = var_2060)[name = string("op_2062")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_2062)[name = string("query_states_17")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_2062)[name = string("key_states_21")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_2062)[name = string("value_states_17")]; tensor var_2104 = const()[name = string("op_2104"), val = tensor([1, 32, 128, 64])]; tensor var_2105 = reshape(shape = var_2104, x = query_states_17)[name = string("op_2105")]; tensor var_2110 = const()[name = string("op_2110"), val = tensor([0, 1, 3, 2])]; tensor var_2115 = const()[name = string("op_2115"), val = tensor([1, 8, 128, 64])]; tensor var_2116 = reshape(shape = var_2115, x = key_states_21)[name = string("op_2116")]; tensor var_2121 = const()[name = string("op_2121"), val = tensor([0, 1, 3, 2])]; tensor var_2126 = const()[name = string("op_2126"), val = tensor([1, 8, 128, 64])]; tensor var_2127 = reshape(shape = var_2126, x = value_states_17)[name = string("op_2127")]; tensor var_2132 = const()[name = string("op_2132"), val = tensor([0, 1, 3, 2])]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor x_41 = transpose(perm = var_2110, x = var_2105)[name = string("transpose_142")]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = x_41)[name = string("mean_19")]; tensor input_41 = sub(x = x_41, y = mean_19)[name = string("input_41")]; tensor var_2149_axes_0 = const()[name = string("op_2149_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860086784)))]; fp16 var_2137_to_fp16 = const()[name = string("op_2137_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2149_cast_fp16 = layer_norm(axes = var_2149_axes_0, epsilon = var_2137_to_fp16, gamma = model_model_layers_20_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2149_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor x_43 = transpose(perm = var_2121, x = var_2116)[name = string("transpose_141")]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = x_43)[name = string("mean_21")]; tensor input_43 = sub(x = x_43, y = mean_21)[name = string("input_43")]; tensor var_2167_axes_0 = const()[name = string("op_2167_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087104)))]; fp16 var_2155_to_fp16 = const()[name = string("op_2155_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2167_cast_fp16 = layer_norm(axes = var_2167_axes_0, epsilon = var_2155_to_fp16, gamma = model_model_layers_20_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2167_cast_fp16")]; tensor var_2182 = mul(x = var_2149_cast_fp16, y = cos_5)[name = string("op_2182")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2149_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2149_cast_fp16)[name = string("x2_9")]; fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; tensor var_2203 = mul(x = x2_9, y = const_51_promoted)[name = string("op_2203")]; int32 var_2205 = const()[name = string("op_2205"), val = int32(-1)]; bool var_2206_interleave_0 = const()[name = string("op_2206_interleave_0"), val = bool(false)]; tensor var_2206 = concat(axis = var_2205, interleave = var_2206_interleave_0, values = (var_2203, x1_9))[name = string("op_2206")]; tensor var_2207 = mul(x = var_2206, y = sin_5)[name = string("op_2207")]; tensor query_states_19 = add(x = var_2182, y = var_2207)[name = string("query_states_19")]; tensor var_2210 = mul(x = var_2167_cast_fp16, y = cos_5)[name = string("op_2210")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2167_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2167_cast_fp16)[name = string("x2_11")]; fp16 const_54_promoted = const()[name = string("const_54_promoted"), val = fp16(-0x1p+0)]; tensor var_2231 = mul(x = x2_11, y = const_54_promoted)[name = string("op_2231")]; int32 var_2233 = const()[name = string("op_2233"), val = int32(-1)]; bool var_2234_interleave_0 = const()[name = string("op_2234_interleave_0"), val = bool(false)]; tensor var_2234 = concat(axis = var_2233, interleave = var_2234_interleave_0, values = (var_2231, x1_11))[name = string("op_2234")]; tensor var_2235 = mul(x = var_2234, y = sin_5)[name = string("op_2235")]; tensor key_states_23 = add(x = var_2210, y = var_2235)[name = string("key_states_23")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([20])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([21])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_1233, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_23, x = coreml_update_state_39)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_40")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([56])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([57])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_1233, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19 = transpose(perm = var_2132, x = var_2127)[name = string("transpose_140")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_40)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_41")]; tensor var_2306_begin_0 = const()[name = string("op_2306_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_2306_end_0 = const()[name = string("op_2306_end_0"), val = tensor([21, 8, 1024, 128])]; tensor var_2306_end_mask_0 = const()[name = string("op_2306_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = coreml_update_state_41)[name = string("op_2306_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2306_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2313_begin_0 = const()[name = string("op_2313_begin_0"), val = tensor([56, 0, 0, 0])]; tensor var_2313_end_0 = const()[name = string("op_2313_end_0"), val = tensor([57, 8, 1024, 128])]; tensor var_2313_end_mask_0 = const()[name = string("op_2313_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2313_cast_fp16 = slice_by_index(begin = var_2313_begin_0, end = var_2313_end_0, end_mask = var_2313_end_mask_0, x = coreml_update_state_41)[name = string("op_2313_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2313_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2342 = const()[name = string("op_2342"), val = tensor([1, 4, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2342, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2354 = const()[name = string("op_2354"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_2354, x = x_49_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2362 = const()[name = string("op_2362"), val = tensor([1, 4, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2362, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; bool var_2389_transpose_x_0 = const()[name = string("op_2389_transpose_x_0"), val = bool(false)]; bool var_2389_transpose_y_0 = const()[name = string("op_2389_transpose_y_0"), val = bool(true)]; tensor var_2389 = matmul(transpose_x = var_2389_transpose_x_0, transpose_y = var_2389_transpose_y_0, x = query_states_19, y = key_states_27_cast_fp16)[name = string("op_2389")]; fp16 var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_9_cast_fp16 = mul(x = var_2389, y = var_2390_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("attn_weights_11_cast_fp16")]; int32 var_2425 = const()[name = string("op_2425"), val = int32(-1)]; tensor var_2427_cast_fp16 = softmax(axis = var_2425, x = attn_weights_11_cast_fp16)[name = string("op_2427_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 1024])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_2427_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1024, 128])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_55_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 128])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_2439_perm_0 = const()[name = string("op_2439_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2458 = const()[name = string("op_2458"), val = tensor([1, 64, 4096])]; tensor var_2439_cast_fp16 = transpose(perm = var_2439_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_139")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2458, x = var_2439_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2463 = const()[name = string("op_2463"), val = tensor([0, 2, 1])]; string var_2479_pad_type_0 = const()[name = string("op_2479_pad_type_0"), val = string("valid")]; int32 var_2479_groups_0 = const()[name = string("op_2479_groups_0"), val = int32(1)]; tensor var_2479_strides_0 = const()[name = string("op_2479_strides_0"), val = tensor([1])]; tensor var_2479_pad_0 = const()[name = string("op_2479_pad_0"), val = tensor([0, 0])]; tensor var_2479_dilations_0 = const()[name = string("op_2479_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865330368))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2464_cast_fp16 = transpose(perm = var_2463, x = attn_output_25_cast_fp16)[name = string("transpose_138")]; tensor var_2479_cast_fp16 = conv(dilations = var_2479_dilations_0, groups = var_2479_groups_0, pad = var_2479_pad_0, pad_type = var_2479_pad_type_0, strides = var_2479_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2464_cast_fp16)[name = string("op_2479_cast_fp16")]; tensor var_2483 = const()[name = string("op_2483"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_2483, x = var_2479_cast_fp16)[name = string("transpose_137")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_2502_axes_0 = const()[name = string("op_2502_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865340672)))]; fp16 var_2490_to_fp16 = const()[name = string("op_2490_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2502_cast_fp16 = layer_norm(axes = var_2502_axes_0, epsilon = var_2490_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_2502_cast_fp16")]; tensor var_2516 = const()[name = string("op_2516"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_2517 = transpose(perm = var_2516, x = var_2502_cast_fp16)[name = string("transpose_136")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2517)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_2539_axes_0 = const()[name = string("op_2539_axes_0"), val = tensor([2])]; tensor var_2539 = squeeze(axes = var_2539_axes_0, x = e_5)[name = string("op_2539")]; tensor var_2540 = const()[name = string("op_2540"), val = tensor([0, 2, 1])]; tensor var_2541 = transpose(perm = var_2540, x = var_2539)[name = string("transpose_135")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_2541)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_2559_axes_0 = const()[name = string("op_2559_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865345856)))]; fp16 var_2547_to_fp16 = const()[name = string("op_2547_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2559_cast_fp16 = layer_norm(axes = var_2559_axes_0, epsilon = var_2547_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_2559_cast_fp16")]; tensor var_2571 = const()[name = string("op_2571"), val = tensor([0, 2, 1])]; tensor var_2574_axes_0 = const()[name = string("op_2574_axes_0"), val = tensor([2])]; tensor var_2572 = transpose(perm = var_2571, x = var_2559_cast_fp16)[name = string("transpose_134")]; tensor var_2574 = expand_dims(axes = var_2574_axes_0, x = var_2572)[name = string("op_2574")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_2574)[name = string("query_states_25")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_2574)[name = string("key_states_31")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_2574)[name = string("value_states_25")]; tensor var_2616 = const()[name = string("op_2616"), val = tensor([1, 32, 128, 64])]; tensor var_2617 = reshape(shape = var_2616, x = query_states_25)[name = string("op_2617")]; tensor var_2622 = const()[name = string("op_2622"), val = tensor([0, 1, 3, 2])]; tensor var_2627 = const()[name = string("op_2627"), val = tensor([1, 8, 128, 64])]; tensor var_2628 = reshape(shape = var_2627, x = key_states_31)[name = string("op_2628")]; tensor var_2633 = const()[name = string("op_2633"), val = tensor([0, 1, 3, 2])]; tensor var_2638 = const()[name = string("op_2638"), val = tensor([1, 8, 128, 64])]; tensor var_2639 = reshape(shape = var_2638, x = value_states_25)[name = string("op_2639")]; tensor var_2644 = const()[name = string("op_2644"), val = tensor([0, 1, 3, 2])]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor x_61 = transpose(perm = var_2622, x = var_2617)[name = string("transpose_133")]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = x_61)[name = string("mean_27")]; tensor input_59 = sub(x = x_61, y = mean_27)[name = string("input_59")]; tensor var_2661_axes_0 = const()[name = string("op_2661_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351040)))]; fp16 var_2649_to_fp16 = const()[name = string("op_2649_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2661_cast_fp16 = layer_norm(axes = var_2661_axes_0, epsilon = var_2649_to_fp16, gamma = model_model_layers_21_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_2661_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor x_63 = transpose(perm = var_2633, x = var_2628)[name = string("transpose_132")]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = x_63)[name = string("mean_29")]; tensor input_61 = sub(x = x_63, y = mean_29)[name = string("input_61")]; tensor var_2679_axes_0 = const()[name = string("op_2679_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351360)))]; fp16 var_2667_to_fp16 = const()[name = string("op_2667_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2679_cast_fp16 = layer_norm(axes = var_2679_axes_0, epsilon = var_2667_to_fp16, gamma = model_model_layers_21_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_2679_cast_fp16")]; tensor var_2694 = mul(x = var_2661_cast_fp16, y = cos_5)[name = string("op_2694")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_2661_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_2661_cast_fp16)[name = string("x2_13")]; fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; tensor var_2715 = mul(x = x2_13, y = const_73_promoted)[name = string("op_2715")]; int32 var_2717 = const()[name = string("op_2717"), val = int32(-1)]; bool var_2718_interleave_0 = const()[name = string("op_2718_interleave_0"), val = bool(false)]; tensor var_2718 = concat(axis = var_2717, interleave = var_2718_interleave_0, values = (var_2715, x1_13))[name = string("op_2718")]; tensor var_2719 = mul(x = var_2718, y = sin_5)[name = string("op_2719")]; tensor query_states_27 = add(x = var_2694, y = var_2719)[name = string("query_states_27")]; tensor var_2722 = mul(x = var_2679_cast_fp16, y = cos_5)[name = string("op_2722")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_2679_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_2679_cast_fp16)[name = string("x2_15")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_2743 = mul(x = x2_15, y = const_76_promoted)[name = string("op_2743")]; int32 var_2745 = const()[name = string("op_2745"), val = int32(-1)]; bool var_2746_interleave_0 = const()[name = string("op_2746_interleave_0"), val = bool(false)]; tensor var_2746 = concat(axis = var_2745, interleave = var_2746_interleave_0, values = (var_2743, x1_15))[name = string("op_2746")]; tensor var_2747 = mul(x = var_2746, y = sin_5)[name = string("op_2747")]; tensor key_states_33 = add(x = var_2722, y = var_2747)[name = string("key_states_33")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([21])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([22])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_1233, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_33, x = coreml_update_state_41)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_42")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([57])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([58])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_1233, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_2644, x = var_2639)[name = string("transpose_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_42)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_43")]; tensor var_2818_begin_0 = const()[name = string("op_2818_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_2818_end_0 = const()[name = string("op_2818_end_0"), val = tensor([22, 8, 1024, 128])]; tensor var_2818_end_mask_0 = const()[name = string("op_2818_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2818_cast_fp16 = slice_by_index(begin = var_2818_begin_0, end = var_2818_end_0, end_mask = var_2818_end_mask_0, x = coreml_update_state_43)[name = string("op_2818_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_2818_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_2825_begin_0 = const()[name = string("op_2825_begin_0"), val = tensor([57, 0, 0, 0])]; tensor var_2825_end_0 = const()[name = string("op_2825_end_0"), val = tensor([58, 8, 1024, 128])]; tensor var_2825_end_mask_0 = const()[name = string("op_2825_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2825_cast_fp16 = slice_by_index(begin = var_2825_begin_0, end = var_2825_end_0, end_mask = var_2825_end_mask_0, x = coreml_update_state_43)[name = string("op_2825_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_2825_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_2854 = const()[name = string("op_2854"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_2854, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_2866 = const()[name = string("op_2866"), val = tensor([1, -1, 1024, 128])]; tensor key_states_37_cast_fp16 = reshape(shape = var_2866, x = x_69_cast_fp16)[name = string("key_states_37_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_2874 = const()[name = string("op_2874"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_2874, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_2901_transpose_x_0 = const()[name = string("op_2901_transpose_x_0"), val = bool(false)]; bool var_2901_transpose_y_0 = const()[name = string("op_2901_transpose_y_0"), val = bool(true)]; tensor var_2901 = matmul(transpose_x = var_2901_transpose_x_0, transpose_y = var_2901_transpose_y_0, x = query_states_27, y = key_states_37_cast_fp16)[name = string("op_2901")]; fp16 var_2902_to_fp16 = const()[name = string("op_2902_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2901, y = var_2902_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2937 = const()[name = string("op_2937"), val = int32(-1)]; tensor var_2939_cast_fp16 = softmax(axis = var_2937, x = attn_weights_15_cast_fp16)[name = string("op_2939_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 1024])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_2939_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1024, 128])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_75_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 128])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2970 = const()[name = string("op_2970"), val = tensor([1, 64, 4096])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_130")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_2970, x = var_2951_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_2975 = const()[name = string("op_2975"), val = tensor([0, 2, 1])]; string var_2991_pad_type_0 = const()[name = string("op_2991_pad_type_0"), val = string("valid")]; int32 var_2991_groups_0 = const()[name = string("op_2991_groups_0"), val = int32(1)]; tensor var_2991_strides_0 = const()[name = string("op_2991_strides_0"), val = tensor([1])]; tensor var_2991_pad_0 = const()[name = string("op_2991_pad_0"), val = tensor([0, 0])]; tensor var_2991_dilations_0 = const()[name = string("op_2991_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865351680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870594624))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2976_cast_fp16 = transpose(perm = var_2975, x = attn_output_35_cast_fp16)[name = string("transpose_129")]; tensor var_2991_cast_fp16 = conv(dilations = var_2991_dilations_0, groups = var_2991_groups_0, pad = var_2991_pad_0, pad_type = var_2991_pad_type_0, strides = var_2991_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2976_cast_fp16)[name = string("op_2991_cast_fp16")]; tensor var_2995 = const()[name = string("op_2995"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_2995, x = var_2991_cast_fp16)[name = string("transpose_128")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_3014_axes_0 = const()[name = string("op_3014_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870604928)))]; fp16 var_3002_to_fp16 = const()[name = string("op_3002_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3014_cast_fp16 = layer_norm(axes = var_3014_axes_0, epsilon = var_3002_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_3014_cast_fp16")]; tensor var_3028 = const()[name = string("op_3028"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_3029 = transpose(perm = var_3028, x = var_3014_cast_fp16)[name = string("transpose_127")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3029)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_3051_axes_0 = const()[name = string("op_3051_axes_0"), val = tensor([2])]; tensor var_3051 = squeeze(axes = var_3051_axes_0, x = e_7)[name = string("op_3051")]; tensor var_3052 = const()[name = string("op_3052"), val = tensor([0, 2, 1])]; tensor var_3053 = transpose(perm = var_3052, x = var_3051)[name = string("transpose_126")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_3053)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_3071_axes_0 = const()[name = string("op_3071_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870610112)))]; fp16 var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3071_cast_fp16 = layer_norm(axes = var_3071_axes_0, epsilon = var_3059_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_3071_cast_fp16")]; tensor var_3083 = const()[name = string("op_3083"), val = tensor([0, 2, 1])]; tensor var_3086_axes_0 = const()[name = string("op_3086_axes_0"), val = tensor([2])]; tensor var_3084 = transpose(perm = var_3083, x = var_3071_cast_fp16)[name = string("transpose_125")]; tensor var_3086 = expand_dims(axes = var_3086_axes_0, x = var_3084)[name = string("op_3086")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_3086)[name = string("query_states_33")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_3086)[name = string("key_states_41")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_3086)[name = string("value_states_33")]; tensor var_3128 = const()[name = string("op_3128"), val = tensor([1, 32, 128, 64])]; tensor var_3129 = reshape(shape = var_3128, x = query_states_33)[name = string("op_3129")]; tensor var_3134 = const()[name = string("op_3134"), val = tensor([0, 1, 3, 2])]; tensor var_3139 = const()[name = string("op_3139"), val = tensor([1, 8, 128, 64])]; tensor var_3140 = reshape(shape = var_3139, x = key_states_41)[name = string("op_3140")]; tensor var_3145 = const()[name = string("op_3145"), val = tensor([0, 1, 3, 2])]; tensor var_3150 = const()[name = string("op_3150"), val = tensor([1, 8, 128, 64])]; tensor var_3151 = reshape(shape = var_3150, x = value_states_33)[name = string("op_3151")]; tensor var_3156 = const()[name = string("op_3156"), val = tensor([0, 1, 3, 2])]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor x_81 = transpose(perm = var_3134, x = var_3129)[name = string("transpose_124")]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = x_81)[name = string("mean_35")]; tensor input_77 = sub(x = x_81, y = mean_35)[name = string("input_77")]; tensor var_3173_axes_0 = const()[name = string("op_3173_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615296)))]; fp16 var_3161_to_fp16 = const()[name = string("op_3161_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3173_cast_fp16 = layer_norm(axes = var_3173_axes_0, epsilon = var_3161_to_fp16, gamma = model_model_layers_22_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_3173_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor x_83 = transpose(perm = var_3145, x = var_3140)[name = string("transpose_123")]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = x_83)[name = string("mean_37")]; tensor input_79 = sub(x = x_83, y = mean_37)[name = string("input_79")]; tensor var_3191_axes_0 = const()[name = string("op_3191_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615616)))]; fp16 var_3179_to_fp16 = const()[name = string("op_3179_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3191_cast_fp16 = layer_norm(axes = var_3191_axes_0, epsilon = var_3179_to_fp16, gamma = model_model_layers_22_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3191_cast_fp16")]; tensor var_3206 = mul(x = var_3173_cast_fp16, y = cos_5)[name = string("op_3206")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_3173_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_3173_cast_fp16)[name = string("x2_17")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_3227 = mul(x = x2_17, y = const_95_promoted)[name = string("op_3227")]; int32 var_3229 = const()[name = string("op_3229"), val = int32(-1)]; bool var_3230_interleave_0 = const()[name = string("op_3230_interleave_0"), val = bool(false)]; tensor var_3230 = concat(axis = var_3229, interleave = var_3230_interleave_0, values = (var_3227, x1_17))[name = string("op_3230")]; tensor var_3231 = mul(x = var_3230, y = sin_5)[name = string("op_3231")]; tensor query_states_35 = add(x = var_3206, y = var_3231)[name = string("query_states_35")]; tensor var_3234 = mul(x = var_3191_cast_fp16, y = cos_5)[name = string("op_3234")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3191_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3191_cast_fp16)[name = string("x2_19")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_3255 = mul(x = x2_19, y = const_98_promoted)[name = string("op_3255")]; int32 var_3257 = const()[name = string("op_3257"), val = int32(-1)]; bool var_3258_interleave_0 = const()[name = string("op_3258_interleave_0"), val = bool(false)]; tensor var_3258 = concat(axis = var_3257, interleave = var_3258_interleave_0, values = (var_3255, x1_19))[name = string("op_3258")]; tensor var_3259 = mul(x = var_3258, y = sin_5)[name = string("op_3259")]; tensor key_states_43 = add(x = var_3234, y = var_3259)[name = string("key_states_43")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([22])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([23])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_1233, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_43, x = coreml_update_state_43)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_44")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([58])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([59])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_1233, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35 = transpose(perm = var_3156, x = var_3151)[name = string("transpose_122")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_44)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_45")]; tensor var_3330_begin_0 = const()[name = string("op_3330_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_3330_end_0 = const()[name = string("op_3330_end_0"), val = tensor([23, 8, 1024, 128])]; tensor var_3330_end_mask_0 = const()[name = string("op_3330_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3330_cast_fp16 = slice_by_index(begin = var_3330_begin_0, end = var_3330_end_0, end_mask = var_3330_end_mask_0, x = coreml_update_state_45)[name = string("op_3330_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3330_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3337_begin_0 = const()[name = string("op_3337_begin_0"), val = tensor([58, 0, 0, 0])]; tensor var_3337_end_0 = const()[name = string("op_3337_end_0"), val = tensor([59, 8, 1024, 128])]; tensor var_3337_end_mask_0 = const()[name = string("op_3337_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3337_cast_fp16 = slice_by_index(begin = var_3337_begin_0, end = var_3337_end_0, end_mask = var_3337_end_mask_0, x = coreml_update_state_45)[name = string("op_3337_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3337_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3366 = const()[name = string("op_3366"), val = tensor([1, 4, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3366, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3378 = const()[name = string("op_3378"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_3378, x = x_89_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3386 = const()[name = string("op_3386"), val = tensor([1, 4, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3386, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; bool var_3413_transpose_x_0 = const()[name = string("op_3413_transpose_x_0"), val = bool(false)]; bool var_3413_transpose_y_0 = const()[name = string("op_3413_transpose_y_0"), val = bool(true)]; tensor var_3413 = matmul(transpose_x = var_3413_transpose_x_0, transpose_y = var_3413_transpose_y_0, x = query_states_35, y = key_states_47_cast_fp16)[name = string("op_3413")]; fp16 var_3414_to_fp16 = const()[name = string("op_3414_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_17_cast_fp16 = mul(x = var_3413, y = var_3414_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("attn_weights_19_cast_fp16")]; int32 var_3449 = const()[name = string("op_3449"), val = int32(-1)]; tensor var_3451_cast_fp16 = softmax(axis = var_3449, x = attn_weights_19_cast_fp16)[name = string("op_3451_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 1024])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_3451_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 1024, 128])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_95_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 128])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_3463_perm_0 = const()[name = string("op_3463_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3482 = const()[name = string("op_3482"), val = tensor([1, 64, 4096])]; tensor var_3463_cast_fp16 = transpose(perm = var_3463_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_121")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_3482, x = var_3463_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_3487 = const()[name = string("op_3487"), val = tensor([0, 2, 1])]; string var_3503_pad_type_0 = const()[name = string("op_3503_pad_type_0"), val = string("valid")]; int32 var_3503_groups_0 = const()[name = string("op_3503_groups_0"), val = int32(1)]; tensor var_3503_strides_0 = const()[name = string("op_3503_strides_0"), val = tensor([1])]; tensor var_3503_pad_0 = const()[name = string("op_3503_pad_0"), val = tensor([0, 0])]; tensor var_3503_dilations_0 = const()[name = string("op_3503_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870615936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875858880))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3488_cast_fp16 = transpose(perm = var_3487, x = attn_output_45_cast_fp16)[name = string("transpose_120")]; tensor var_3503_cast_fp16 = conv(dilations = var_3503_dilations_0, groups = var_3503_groups_0, pad = var_3503_pad_0, pad_type = var_3503_pad_type_0, strides = var_3503_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3488_cast_fp16)[name = string("op_3503_cast_fp16")]; tensor var_3507 = const()[name = string("op_3507"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_3507, x = var_3503_cast_fp16)[name = string("transpose_119")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_3526_axes_0 = const()[name = string("op_3526_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875869184)))]; fp16 var_3514_to_fp16 = const()[name = string("op_3514_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3526_cast_fp16 = layer_norm(axes = var_3526_axes_0, epsilon = var_3514_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_3526_cast_fp16")]; tensor var_3540 = const()[name = string("op_3540"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_3541 = transpose(perm = var_3540, x = var_3526_cast_fp16)[name = string("transpose_118")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3541)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_3563_axes_0 = const()[name = string("op_3563_axes_0"), val = tensor([2])]; tensor var_3563 = squeeze(axes = var_3563_axes_0, x = e_9)[name = string("op_3563")]; tensor var_3564 = const()[name = string("op_3564"), val = tensor([0, 2, 1])]; tensor var_3565 = transpose(perm = var_3564, x = var_3563)[name = string("transpose_117")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3565)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_3583_axes_0 = const()[name = string("op_3583_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875874368)))]; fp16 var_3571_to_fp16 = const()[name = string("op_3571_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3583_cast_fp16 = layer_norm(axes = var_3583_axes_0, epsilon = var_3571_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_3583_cast_fp16")]; tensor var_3595 = const()[name = string("op_3595"), val = tensor([0, 2, 1])]; tensor var_3598_axes_0 = const()[name = string("op_3598_axes_0"), val = tensor([2])]; tensor var_3596 = transpose(perm = var_3595, x = var_3583_cast_fp16)[name = string("transpose_116")]; tensor var_3598 = expand_dims(axes = var_3598_axes_0, x = var_3596)[name = string("op_3598")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_3598)[name = string("query_states_41")]; string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_3598)[name = string("key_states_51")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_3598)[name = string("value_states_41")]; tensor var_3640 = const()[name = string("op_3640"), val = tensor([1, 32, 128, 64])]; tensor var_3641 = reshape(shape = var_3640, x = query_states_41)[name = string("op_3641")]; tensor var_3646 = const()[name = string("op_3646"), val = tensor([0, 1, 3, 2])]; tensor var_3651 = const()[name = string("op_3651"), val = tensor([1, 8, 128, 64])]; tensor var_3652 = reshape(shape = var_3651, x = key_states_51)[name = string("op_3652")]; tensor var_3657 = const()[name = string("op_3657"), val = tensor([0, 1, 3, 2])]; tensor var_3662 = const()[name = string("op_3662"), val = tensor([1, 8, 128, 64])]; tensor var_3663 = reshape(shape = var_3662, x = value_states_41)[name = string("op_3663")]; tensor var_3668 = const()[name = string("op_3668"), val = tensor([0, 1, 3, 2])]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor x_101 = transpose(perm = var_3646, x = var_3641)[name = string("transpose_115")]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = x_101)[name = string("mean_43")]; tensor input_95 = sub(x = x_101, y = mean_43)[name = string("input_95")]; tensor var_3685_axes_0 = const()[name = string("op_3685_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879552)))]; fp16 var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3685_cast_fp16 = layer_norm(axes = var_3685_axes_0, epsilon = var_3673_to_fp16, gamma = model_model_layers_23_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_3685_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor x_103 = transpose(perm = var_3657, x = var_3652)[name = string("transpose_114")]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = x_103)[name = string("mean_45")]; tensor input_97 = sub(x = x_103, y = mean_45)[name = string("input_97")]; tensor var_3703_axes_0 = const()[name = string("op_3703_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875879872)))]; fp16 var_3691_to_fp16 = const()[name = string("op_3691_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3703_cast_fp16 = layer_norm(axes = var_3703_axes_0, epsilon = var_3691_to_fp16, gamma = model_model_layers_23_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_3703_cast_fp16")]; tensor var_3718 = mul(x = var_3685_cast_fp16, y = cos_5)[name = string("op_3718")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_3685_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_3685_cast_fp16)[name = string("x2_21")]; fp16 const_117_promoted = const()[name = string("const_117_promoted"), val = fp16(-0x1p+0)]; tensor var_3739 = mul(x = x2_21, y = const_117_promoted)[name = string("op_3739")]; int32 var_3741 = const()[name = string("op_3741"), val = int32(-1)]; bool var_3742_interleave_0 = const()[name = string("op_3742_interleave_0"), val = bool(false)]; tensor var_3742 = concat(axis = var_3741, interleave = var_3742_interleave_0, values = (var_3739, x1_21))[name = string("op_3742")]; tensor var_3743 = mul(x = var_3742, y = sin_5)[name = string("op_3743")]; tensor query_states_43 = add(x = var_3718, y = var_3743)[name = string("query_states_43")]; tensor var_3746 = mul(x = var_3703_cast_fp16, y = cos_5)[name = string("op_3746")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_3703_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_3703_cast_fp16)[name = string("x2_23")]; fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; tensor var_3767 = mul(x = x2_23, y = const_120_promoted)[name = string("op_3767")]; int32 var_3769 = const()[name = string("op_3769"), val = int32(-1)]; bool var_3770_interleave_0 = const()[name = string("op_3770_interleave_0"), val = bool(false)]; tensor var_3770 = concat(axis = var_3769, interleave = var_3770_interleave_0, values = (var_3767, x1_23))[name = string("op_3770")]; tensor var_3771 = mul(x = var_3770, y = sin_5)[name = string("op_3771")]; tensor key_states_53 = add(x = var_3746, y = var_3771)[name = string("key_states_53")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([23])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([24])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_1233, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_53, x = coreml_update_state_45)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_46")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([59])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([60])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_1233, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43 = transpose(perm = var_3668, x = var_3663)[name = string("transpose_113")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_43, x = coreml_update_state_46)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_47")]; tensor var_3842_begin_0 = const()[name = string("op_3842_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_3842_end_0 = const()[name = string("op_3842_end_0"), val = tensor([24, 8, 1024, 128])]; tensor var_3842_end_mask_0 = const()[name = string("op_3842_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3842_cast_fp16 = slice_by_index(begin = var_3842_begin_0, end = var_3842_end_0, end_mask = var_3842_end_mask_0, x = coreml_update_state_47)[name = string("op_3842_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_3842_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_3849_begin_0 = const()[name = string("op_3849_begin_0"), val = tensor([59, 0, 0, 0])]; tensor var_3849_end_0 = const()[name = string("op_3849_end_0"), val = tensor([60, 8, 1024, 128])]; tensor var_3849_end_mask_0 = const()[name = string("op_3849_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3849_cast_fp16 = slice_by_index(begin = var_3849_begin_0, end = var_3849_end_0, end_mask = var_3849_end_mask_0, x = coreml_update_state_47)[name = string("op_3849_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_3849_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_3878 = const()[name = string("op_3878"), val = tensor([1, 4, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_3878, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_3890 = const()[name = string("op_3890"), val = tensor([1, -1, 1024, 128])]; tensor key_states_57_cast_fp16 = reshape(shape = var_3890, x = x_109_cast_fp16)[name = string("key_states_57_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_3898 = const()[name = string("op_3898"), val = tensor([1, 4, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_3898, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)]; bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(true)]; tensor var_3925 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = query_states_43, y = key_states_57_cast_fp16)[name = string("op_3925")]; fp16 var_3926_to_fp16 = const()[name = string("op_3926_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_21_cast_fp16 = mul(x = var_3925, y = var_3926_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; int32 var_3961 = const()[name = string("op_3961"), val = int32(-1)]; tensor var_3963_cast_fp16 = softmax(axis = var_3961, x = attn_weights_23_cast_fp16)[name = string("op_3963_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 1024])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_3963_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 1024, 128])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_115_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 128])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_3975_perm_0 = const()[name = string("op_3975_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3994 = const()[name = string("op_3994"), val = tensor([1, 64, 4096])]; tensor var_3975_cast_fp16 = transpose(perm = var_3975_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_112")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_3994, x = var_3975_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_3999 = const()[name = string("op_3999"), val = tensor([0, 2, 1])]; string var_4015_pad_type_0 = const()[name = string("op_4015_pad_type_0"), val = string("valid")]; int32 var_4015_groups_0 = const()[name = string("op_4015_groups_0"), val = int32(1)]; tensor var_4015_strides_0 = const()[name = string("op_4015_strides_0"), val = tensor([1])]; tensor var_4015_pad_0 = const()[name = string("op_4015_pad_0"), val = tensor([0, 0])]; tensor var_4015_dilations_0 = const()[name = string("op_4015_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881123136))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4000_cast_fp16 = transpose(perm = var_3999, x = attn_output_55_cast_fp16)[name = string("transpose_111")]; tensor var_4015_cast_fp16 = conv(dilations = var_4015_dilations_0, groups = var_4015_groups_0, pad = var_4015_pad_0, pad_type = var_4015_pad_type_0, strides = var_4015_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4000_cast_fp16)[name = string("op_4015_cast_fp16")]; tensor var_4019 = const()[name = string("op_4019"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_4019, x = var_4015_cast_fp16)[name = string("transpose_110")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_4038_axes_0 = const()[name = string("op_4038_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881133440)))]; fp16 var_4026_to_fp16 = const()[name = string("op_4026_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4038_cast_fp16 = layer_norm(axes = var_4038_axes_0, epsilon = var_4026_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_4038_cast_fp16")]; tensor var_4052 = const()[name = string("op_4052"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_4053 = transpose(perm = var_4052, x = var_4038_cast_fp16)[name = string("transpose_109")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4053)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_4075_axes_0 = const()[name = string("op_4075_axes_0"), val = tensor([2])]; tensor var_4075 = squeeze(axes = var_4075_axes_0, x = e_11)[name = string("op_4075")]; tensor var_4076 = const()[name = string("op_4076"), val = tensor([0, 2, 1])]; tensor var_4077 = transpose(perm = var_4076, x = var_4075)[name = string("transpose_108")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_4077)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_4095_axes_0 = const()[name = string("op_4095_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881138624)))]; fp16 var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4095_cast_fp16 = layer_norm(axes = var_4095_axes_0, epsilon = var_4083_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_4095_cast_fp16")]; tensor var_4107 = const()[name = string("op_4107"), val = tensor([0, 2, 1])]; tensor var_4110_axes_0 = const()[name = string("op_4110_axes_0"), val = tensor([2])]; tensor var_4108 = transpose(perm = var_4107, x = var_4095_cast_fp16)[name = string("transpose_107")]; tensor var_4110 = expand_dims(axes = var_4110_axes_0, x = var_4108)[name = string("op_4110")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_4110)[name = string("query_states_49")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_4110)[name = string("key_states_61")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_4110)[name = string("value_states_49")]; tensor var_4152 = const()[name = string("op_4152"), val = tensor([1, 32, 128, 64])]; tensor var_4153 = reshape(shape = var_4152, x = query_states_49)[name = string("op_4153")]; tensor var_4158 = const()[name = string("op_4158"), val = tensor([0, 1, 3, 2])]; tensor var_4163 = const()[name = string("op_4163"), val = tensor([1, 8, 128, 64])]; tensor var_4164 = reshape(shape = var_4163, x = key_states_61)[name = string("op_4164")]; tensor var_4169 = const()[name = string("op_4169"), val = tensor([0, 1, 3, 2])]; tensor var_4174 = const()[name = string("op_4174"), val = tensor([1, 8, 128, 64])]; tensor var_4175 = reshape(shape = var_4174, x = value_states_49)[name = string("op_4175")]; tensor var_4180 = const()[name = string("op_4180"), val = tensor([0, 1, 3, 2])]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor x_121 = transpose(perm = var_4158, x = var_4153)[name = string("transpose_106")]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = x_121)[name = string("mean_51")]; tensor input_113 = sub(x = x_121, y = mean_51)[name = string("input_113")]; tensor var_4197_axes_0 = const()[name = string("op_4197_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881143808)))]; fp16 var_4185_to_fp16 = const()[name = string("op_4185_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4197_cast_fp16 = layer_norm(axes = var_4197_axes_0, epsilon = var_4185_to_fp16, gamma = model_model_layers_24_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_4197_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor x_123 = transpose(perm = var_4169, x = var_4164)[name = string("transpose_105")]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = x_123)[name = string("mean_53")]; tensor input_115 = sub(x = x_123, y = mean_53)[name = string("input_115")]; tensor var_4215_axes_0 = const()[name = string("op_4215_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144128)))]; fp16 var_4203_to_fp16 = const()[name = string("op_4203_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4215_cast_fp16 = layer_norm(axes = var_4215_axes_0, epsilon = var_4203_to_fp16, gamma = model_model_layers_24_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_4215_cast_fp16")]; tensor var_4230 = mul(x = var_4197_cast_fp16, y = cos_5)[name = string("op_4230")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_4197_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_4197_cast_fp16)[name = string("x2_25")]; fp16 const_139_promoted = const()[name = string("const_139_promoted"), val = fp16(-0x1p+0)]; tensor var_4251 = mul(x = x2_25, y = const_139_promoted)[name = string("op_4251")]; int32 var_4253 = const()[name = string("op_4253"), val = int32(-1)]; bool var_4254_interleave_0 = const()[name = string("op_4254_interleave_0"), val = bool(false)]; tensor var_4254 = concat(axis = var_4253, interleave = var_4254_interleave_0, values = (var_4251, x1_25))[name = string("op_4254")]; tensor var_4255 = mul(x = var_4254, y = sin_5)[name = string("op_4255")]; tensor query_states_51 = add(x = var_4230, y = var_4255)[name = string("query_states_51")]; tensor var_4258 = mul(x = var_4215_cast_fp16, y = cos_5)[name = string("op_4258")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_4215_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_4215_cast_fp16)[name = string("x2_27")]; fp16 const_142_promoted = const()[name = string("const_142_promoted"), val = fp16(-0x1p+0)]; tensor var_4279 = mul(x = x2_27, y = const_142_promoted)[name = string("op_4279")]; int32 var_4281 = const()[name = string("op_4281"), val = int32(-1)]; bool var_4282_interleave_0 = const()[name = string("op_4282_interleave_0"), val = bool(false)]; tensor var_4282 = concat(axis = var_4281, interleave = var_4282_interleave_0, values = (var_4279, x1_27))[name = string("op_4282")]; tensor var_4283 = mul(x = var_4282, y = sin_5)[name = string("op_4283")]; tensor key_states_63 = add(x = var_4258, y = var_4283)[name = string("key_states_63")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([24])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([25])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_1233, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_63, x = coreml_update_state_47)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_48")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([60])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([61])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_1233, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51 = transpose(perm = var_4180, x = var_4175)[name = string("transpose_104")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_51, x = coreml_update_state_48)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_49")]; tensor var_4354_begin_0 = const()[name = string("op_4354_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_4354_end_0 = const()[name = string("op_4354_end_0"), val = tensor([25, 8, 1024, 128])]; tensor var_4354_end_mask_0 = const()[name = string("op_4354_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4354_cast_fp16 = slice_by_index(begin = var_4354_begin_0, end = var_4354_end_0, end_mask = var_4354_end_mask_0, x = coreml_update_state_49)[name = string("op_4354_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4354_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4361_begin_0 = const()[name = string("op_4361_begin_0"), val = tensor([60, 0, 0, 0])]; tensor var_4361_end_0 = const()[name = string("op_4361_end_0"), val = tensor([61, 8, 1024, 128])]; tensor var_4361_end_mask_0 = const()[name = string("op_4361_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4361_cast_fp16 = slice_by_index(begin = var_4361_begin_0, end = var_4361_end_0, end_mask = var_4361_end_mask_0, x = coreml_update_state_49)[name = string("op_4361_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4361_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4390 = const()[name = string("op_4390"), val = tensor([1, 4, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4390, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4402 = const()[name = string("op_4402"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_4402, x = x_129_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4410 = const()[name = string("op_4410"), val = tensor([1, 4, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4410, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; bool var_4437_transpose_x_0 = const()[name = string("op_4437_transpose_x_0"), val = bool(false)]; bool var_4437_transpose_y_0 = const()[name = string("op_4437_transpose_y_0"), val = bool(true)]; tensor var_4437 = matmul(transpose_x = var_4437_transpose_x_0, transpose_y = var_4437_transpose_y_0, x = query_states_51, y = key_states_67_cast_fp16)[name = string("op_4437")]; fp16 var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_4437, y = var_4438_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_4473 = const()[name = string("op_4473"), val = int32(-1)]; tensor var_4475_cast_fp16 = softmax(axis = var_4473, x = attn_weights_27_cast_fp16)[name = string("op_4475_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 1024])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_4475_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 1024, 128])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_135_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 128])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_4487_perm_0 = const()[name = string("op_4487_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4506 = const()[name = string("op_4506"), val = tensor([1, 64, 4096])]; tensor var_4487_cast_fp16 = transpose(perm = var_4487_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_103")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_4506, x = var_4487_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_4511 = const()[name = string("op_4511"), val = tensor([0, 2, 1])]; string var_4527_pad_type_0 = const()[name = string("op_4527_pad_type_0"), val = string("valid")]; int32 var_4527_groups_0 = const()[name = string("op_4527_groups_0"), val = int32(1)]; tensor var_4527_strides_0 = const()[name = string("op_4527_strides_0"), val = tensor([1])]; tensor var_4527_pad_0 = const()[name = string("op_4527_pad_0"), val = tensor([0, 0])]; tensor var_4527_dilations_0 = const()[name = string("op_4527_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881144448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886387392))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4512_cast_fp16 = transpose(perm = var_4511, x = attn_output_65_cast_fp16)[name = string("transpose_102")]; tensor var_4527_cast_fp16 = conv(dilations = var_4527_dilations_0, groups = var_4527_groups_0, pad = var_4527_pad_0, pad_type = var_4527_pad_type_0, strides = var_4527_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4512_cast_fp16)[name = string("op_4527_cast_fp16")]; tensor var_4531 = const()[name = string("op_4531"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_4531, x = var_4527_cast_fp16)[name = string("transpose_101")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_4550_axes_0 = const()[name = string("op_4550_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886397696)))]; fp16 var_4538_to_fp16 = const()[name = string("op_4538_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4550_cast_fp16 = layer_norm(axes = var_4550_axes_0, epsilon = var_4538_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_4550_cast_fp16")]; tensor var_4564 = const()[name = string("op_4564"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4565 = transpose(perm = var_4564, x = var_4550_cast_fp16)[name = string("transpose_100")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4565)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_4587_axes_0 = const()[name = string("op_4587_axes_0"), val = tensor([2])]; tensor var_4587 = squeeze(axes = var_4587_axes_0, x = e_13)[name = string("op_4587")]; tensor var_4588 = const()[name = string("op_4588"), val = tensor([0, 2, 1])]; tensor var_4589 = transpose(perm = var_4588, x = var_4587)[name = string("transpose_99")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_4589)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_4607_axes_0 = const()[name = string("op_4607_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886402880)))]; fp16 var_4595_to_fp16 = const()[name = string("op_4595_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4607_cast_fp16 = layer_norm(axes = var_4607_axes_0, epsilon = var_4595_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_4607_cast_fp16")]; tensor var_4619 = const()[name = string("op_4619"), val = tensor([0, 2, 1])]; tensor var_4622_axes_0 = const()[name = string("op_4622_axes_0"), val = tensor([2])]; tensor var_4620 = transpose(perm = var_4619, x = var_4607_cast_fp16)[name = string("transpose_98")]; tensor var_4622 = expand_dims(axes = var_4622_axes_0, x = var_4620)[name = string("op_4622")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_4622)[name = string("query_states_57")]; string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_4622)[name = string("key_states_71")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_4622)[name = string("value_states_57")]; tensor var_4664 = const()[name = string("op_4664"), val = tensor([1, 32, 128, 64])]; tensor var_4665 = reshape(shape = var_4664, x = query_states_57)[name = string("op_4665")]; tensor var_4670 = const()[name = string("op_4670"), val = tensor([0, 1, 3, 2])]; tensor var_4675 = const()[name = string("op_4675"), val = tensor([1, 8, 128, 64])]; tensor var_4676 = reshape(shape = var_4675, x = key_states_71)[name = string("op_4676")]; tensor var_4681 = const()[name = string("op_4681"), val = tensor([0, 1, 3, 2])]; tensor var_4686 = const()[name = string("op_4686"), val = tensor([1, 8, 128, 64])]; tensor var_4687 = reshape(shape = var_4686, x = value_states_57)[name = string("op_4687")]; tensor var_4692 = const()[name = string("op_4692"), val = tensor([0, 1, 3, 2])]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor x_141 = transpose(perm = var_4670, x = var_4665)[name = string("transpose_97")]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = x_141)[name = string("mean_59")]; tensor input_131 = sub(x = x_141, y = mean_59)[name = string("input_131")]; tensor var_4709_axes_0 = const()[name = string("op_4709_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408064)))]; fp16 var_4697_to_fp16 = const()[name = string("op_4697_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4709_cast_fp16 = layer_norm(axes = var_4709_axes_0, epsilon = var_4697_to_fp16, gamma = model_model_layers_25_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_4709_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor x_143 = transpose(perm = var_4681, x = var_4676)[name = string("transpose_96")]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = x_143)[name = string("mean_61")]; tensor input_133 = sub(x = x_143, y = mean_61)[name = string("input_133")]; tensor var_4727_axes_0 = const()[name = string("op_4727_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408384)))]; fp16 var_4715_to_fp16 = const()[name = string("op_4715_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4727_cast_fp16 = layer_norm(axes = var_4727_axes_0, epsilon = var_4715_to_fp16, gamma = model_model_layers_25_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_4727_cast_fp16")]; tensor var_4742 = mul(x = var_4709_cast_fp16, y = cos_5)[name = string("op_4742")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_4709_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_4709_cast_fp16)[name = string("x2_29")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_4763 = mul(x = x2_29, y = const_161_promoted)[name = string("op_4763")]; int32 var_4765 = const()[name = string("op_4765"), val = int32(-1)]; bool var_4766_interleave_0 = const()[name = string("op_4766_interleave_0"), val = bool(false)]; tensor var_4766 = concat(axis = var_4765, interleave = var_4766_interleave_0, values = (var_4763, x1_29))[name = string("op_4766")]; tensor var_4767 = mul(x = var_4766, y = sin_5)[name = string("op_4767")]; tensor query_states_59 = add(x = var_4742, y = var_4767)[name = string("query_states_59")]; tensor var_4770 = mul(x = var_4727_cast_fp16, y = cos_5)[name = string("op_4770")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_4727_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_4727_cast_fp16)[name = string("x2_31")]; fp16 const_164_promoted = const()[name = string("const_164_promoted"), val = fp16(-0x1p+0)]; tensor var_4791 = mul(x = x2_31, y = const_164_promoted)[name = string("op_4791")]; int32 var_4793 = const()[name = string("op_4793"), val = int32(-1)]; bool var_4794_interleave_0 = const()[name = string("op_4794_interleave_0"), val = bool(false)]; tensor var_4794 = concat(axis = var_4793, interleave = var_4794_interleave_0, values = (var_4791, x1_31))[name = string("op_4794")]; tensor var_4795 = mul(x = var_4794, y = sin_5)[name = string("op_4795")]; tensor key_states_73 = add(x = var_4770, y = var_4795)[name = string("key_states_73")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([25])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([26])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_1233, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_73, x = coreml_update_state_49)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_50")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([61])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([62])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_1233, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59 = transpose(perm = var_4692, x = var_4687)[name = string("transpose_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_59, x = coreml_update_state_50)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_51")]; tensor var_4866_begin_0 = const()[name = string("op_4866_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_4866_end_0 = const()[name = string("op_4866_end_0"), val = tensor([26, 8, 1024, 128])]; tensor var_4866_end_mask_0 = const()[name = string("op_4866_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4866_cast_fp16 = slice_by_index(begin = var_4866_begin_0, end = var_4866_end_0, end_mask = var_4866_end_mask_0, x = coreml_update_state_51)[name = string("op_4866_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_4866_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_4873_begin_0 = const()[name = string("op_4873_begin_0"), val = tensor([61, 0, 0, 0])]; tensor var_4873_end_0 = const()[name = string("op_4873_end_0"), val = tensor([62, 8, 1024, 128])]; tensor var_4873_end_mask_0 = const()[name = string("op_4873_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4873_cast_fp16 = slice_by_index(begin = var_4873_begin_0, end = var_4873_end_0, end_mask = var_4873_end_mask_0, x = coreml_update_state_51)[name = string("op_4873_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_4873_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_4902 = const()[name = string("op_4902"), val = tensor([1, 4, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_4902, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_4914 = const()[name = string("op_4914"), val = tensor([1, -1, 1024, 128])]; tensor key_states_77_cast_fp16 = reshape(shape = var_4914, x = x_149_cast_fp16)[name = string("key_states_77_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_4922 = const()[name = string("op_4922"), val = tensor([1, 4, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_4922, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; bool var_4949_transpose_x_0 = const()[name = string("op_4949_transpose_x_0"), val = bool(false)]; bool var_4949_transpose_y_0 = const()[name = string("op_4949_transpose_y_0"), val = bool(true)]; tensor var_4949 = matmul(transpose_x = var_4949_transpose_x_0, transpose_y = var_4949_transpose_y_0, x = query_states_59, y = key_states_77_cast_fp16)[name = string("op_4949")]; fp16 var_4950_to_fp16 = const()[name = string("op_4950_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_29_cast_fp16 = mul(x = var_4949, y = var_4950_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("attn_weights_31_cast_fp16")]; int32 var_4985 = const()[name = string("op_4985"), val = int32(-1)]; tensor var_4987_cast_fp16 = softmax(axis = var_4985, x = attn_weights_31_cast_fp16)[name = string("op_4987_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 1024])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_4987_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 1024, 128])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_155_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 128])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_4999_perm_0 = const()[name = string("op_4999_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5018 = const()[name = string("op_5018"), val = tensor([1, 64, 4096])]; tensor var_4999_cast_fp16 = transpose(perm = var_4999_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_94")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_5018, x = var_4999_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_5023 = const()[name = string("op_5023"), val = tensor([0, 2, 1])]; string var_5039_pad_type_0 = const()[name = string("op_5039_pad_type_0"), val = string("valid")]; int32 var_5039_groups_0 = const()[name = string("op_5039_groups_0"), val = int32(1)]; tensor var_5039_strides_0 = const()[name = string("op_5039_strides_0"), val = tensor([1])]; tensor var_5039_pad_0 = const()[name = string("op_5039_pad_0"), val = tensor([0, 0])]; tensor var_5039_dilations_0 = const()[name = string("op_5039_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886408704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891651648))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5024_cast_fp16 = transpose(perm = var_5023, x = attn_output_75_cast_fp16)[name = string("transpose_93")]; tensor var_5039_cast_fp16 = conv(dilations = var_5039_dilations_0, groups = var_5039_groups_0, pad = var_5039_pad_0, pad_type = var_5039_pad_type_0, strides = var_5039_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5024_cast_fp16)[name = string("op_5039_cast_fp16")]; tensor var_5043 = const()[name = string("op_5043"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_5043, x = var_5039_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_5062_axes_0 = const()[name = string("op_5062_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891661952)))]; fp16 var_5050_to_fp16 = const()[name = string("op_5050_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5062_cast_fp16 = layer_norm(axes = var_5062_axes_0, epsilon = var_5050_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_5062_cast_fp16")]; tensor var_5076 = const()[name = string("op_5076"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_5077 = transpose(perm = var_5076, x = var_5062_cast_fp16)[name = string("transpose_91")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5077)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_5099_axes_0 = const()[name = string("op_5099_axes_0"), val = tensor([2])]; tensor var_5099 = squeeze(axes = var_5099_axes_0, x = e_15)[name = string("op_5099")]; tensor var_5100 = const()[name = string("op_5100"), val = tensor([0, 2, 1])]; tensor var_5101 = transpose(perm = var_5100, x = var_5099)[name = string("transpose_90")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_5101)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_5119_axes_0 = const()[name = string("op_5119_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891667136)))]; fp16 var_5107_to_fp16 = const()[name = string("op_5107_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5119_cast_fp16 = layer_norm(axes = var_5119_axes_0, epsilon = var_5107_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_5119_cast_fp16")]; tensor var_5131 = const()[name = string("op_5131"), val = tensor([0, 2, 1])]; tensor var_5134_axes_0 = const()[name = string("op_5134_axes_0"), val = tensor([2])]; tensor var_5132 = transpose(perm = var_5131, x = var_5119_cast_fp16)[name = string("transpose_89")]; tensor var_5134 = expand_dims(axes = var_5134_axes_0, x = var_5132)[name = string("op_5134")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_5134)[name = string("query_states_65")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_5134)[name = string("key_states_81")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_5134)[name = string("value_states_65")]; tensor var_5176 = const()[name = string("op_5176"), val = tensor([1, 32, 128, 64])]; tensor var_5177 = reshape(shape = var_5176, x = query_states_65)[name = string("op_5177")]; tensor var_5182 = const()[name = string("op_5182"), val = tensor([0, 1, 3, 2])]; tensor var_5187 = const()[name = string("op_5187"), val = tensor([1, 8, 128, 64])]; tensor var_5188 = reshape(shape = var_5187, x = key_states_81)[name = string("op_5188")]; tensor var_5193 = const()[name = string("op_5193"), val = tensor([0, 1, 3, 2])]; tensor var_5198 = const()[name = string("op_5198"), val = tensor([1, 8, 128, 64])]; tensor var_5199 = reshape(shape = var_5198, x = value_states_65)[name = string("op_5199")]; tensor var_5204 = const()[name = string("op_5204"), val = tensor([0, 1, 3, 2])]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor x_161 = transpose(perm = var_5182, x = var_5177)[name = string("transpose_88")]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = x_161)[name = string("mean_67")]; tensor input_149 = sub(x = x_161, y = mean_67)[name = string("input_149")]; tensor var_5221_axes_0 = const()[name = string("op_5221_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672320)))]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5221_cast_fp16 = layer_norm(axes = var_5221_axes_0, epsilon = var_5209_to_fp16, gamma = model_model_layers_26_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_5221_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor x_163 = transpose(perm = var_5193, x = var_5188)[name = string("transpose_87")]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = x_163)[name = string("mean_69")]; tensor input_151 = sub(x = x_163, y = mean_69)[name = string("input_151")]; tensor var_5239_axes_0 = const()[name = string("op_5239_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672640)))]; fp16 var_5227_to_fp16 = const()[name = string("op_5227_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5239_cast_fp16 = layer_norm(axes = var_5239_axes_0, epsilon = var_5227_to_fp16, gamma = model_model_layers_26_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_5239_cast_fp16")]; tensor var_5254 = mul(x = var_5221_cast_fp16, y = cos_5)[name = string("op_5254")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_5221_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_5221_cast_fp16)[name = string("x2_33")]; fp16 const_183_promoted = const()[name = string("const_183_promoted"), val = fp16(-0x1p+0)]; tensor var_5275 = mul(x = x2_33, y = const_183_promoted)[name = string("op_5275")]; int32 var_5277 = const()[name = string("op_5277"), val = int32(-1)]; bool var_5278_interleave_0 = const()[name = string("op_5278_interleave_0"), val = bool(false)]; tensor var_5278 = concat(axis = var_5277, interleave = var_5278_interleave_0, values = (var_5275, x1_33))[name = string("op_5278")]; tensor var_5279 = mul(x = var_5278, y = sin_5)[name = string("op_5279")]; tensor query_states_67 = add(x = var_5254, y = var_5279)[name = string("query_states_67")]; tensor var_5282 = mul(x = var_5239_cast_fp16, y = cos_5)[name = string("op_5282")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_5239_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_5239_cast_fp16)[name = string("x2_35")]; fp16 const_186_promoted = const()[name = string("const_186_promoted"), val = fp16(-0x1p+0)]; tensor var_5303 = mul(x = x2_35, y = const_186_promoted)[name = string("op_5303")]; int32 var_5305 = const()[name = string("op_5305"), val = int32(-1)]; bool var_5306_interleave_0 = const()[name = string("op_5306_interleave_0"), val = bool(false)]; tensor var_5306 = concat(axis = var_5305, interleave = var_5306_interleave_0, values = (var_5303, x1_35))[name = string("op_5306")]; tensor var_5307 = mul(x = var_5306, y = sin_5)[name = string("op_5307")]; tensor key_states_83 = add(x = var_5282, y = var_5307)[name = string("key_states_83")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([26])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([27])]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_1233, concat_147_values3_0))[name = string("concat_147")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_83, x = coreml_update_state_51)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_52")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([62])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([63])]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_1233, concat_151_values3_0))[name = string("concat_151")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67 = transpose(perm = var_5204, x = var_5199)[name = string("transpose_86")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_67, x = coreml_update_state_52)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_53")]; tensor var_5378_begin_0 = const()[name = string("op_5378_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_5378_end_0 = const()[name = string("op_5378_end_0"), val = tensor([27, 8, 1024, 128])]; tensor var_5378_end_mask_0 = const()[name = string("op_5378_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5378_cast_fp16 = slice_by_index(begin = var_5378_begin_0, end = var_5378_end_0, end_mask = var_5378_end_mask_0, x = coreml_update_state_53)[name = string("op_5378_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_5378_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_5385_begin_0 = const()[name = string("op_5385_begin_0"), val = tensor([62, 0, 0, 0])]; tensor var_5385_end_0 = const()[name = string("op_5385_end_0"), val = tensor([63, 8, 1024, 128])]; tensor var_5385_end_mask_0 = const()[name = string("op_5385_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5385_cast_fp16 = slice_by_index(begin = var_5385_begin_0, end = var_5385_end_0, end_mask = var_5385_end_mask_0, x = coreml_update_state_53)[name = string("op_5385_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_5385_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5414 = const()[name = string("op_5414"), val = tensor([1, 4, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5414, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5426 = const()[name = string("op_5426"), val = tensor([1, -1, 1024, 128])]; tensor key_states_87_cast_fp16 = reshape(shape = var_5426, x = x_169_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5434 = const()[name = string("op_5434"), val = tensor([1, 4, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5434, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; bool var_5461_transpose_x_0 = const()[name = string("op_5461_transpose_x_0"), val = bool(false)]; bool var_5461_transpose_y_0 = const()[name = string("op_5461_transpose_y_0"), val = bool(true)]; tensor var_5461 = matmul(transpose_x = var_5461_transpose_x_0, transpose_y = var_5461_transpose_y_0, x = query_states_67, y = key_states_87_cast_fp16)[name = string("op_5461")]; fp16 var_5462_to_fp16 = const()[name = string("op_5462_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_33_cast_fp16 = mul(x = var_5461, y = var_5462_to_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("attn_weights_35_cast_fp16")]; int32 var_5497 = const()[name = string("op_5497"), val = int32(-1)]; tensor var_5499_cast_fp16 = softmax(axis = var_5497, x = attn_weights_35_cast_fp16)[name = string("op_5499_cast_fp16")]; tensor concat_156 = const()[name = string("concat_156"), val = tensor([32, 64, 1024])]; tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_5499_cast_fp16)[name = string("reshape_24_cast_fp16")]; tensor concat_157 = const()[name = string("concat_157"), val = tensor([32, 1024, 128])]; tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_175_cast_fp16)[name = string("reshape_25_cast_fp16")]; bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 32, 64, 128])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor var_5511_perm_0 = const()[name = string("op_5511_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5530 = const()[name = string("op_5530"), val = tensor([1, 64, 4096])]; tensor var_5511_cast_fp16 = transpose(perm = var_5511_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_85")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_5530, x = var_5511_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_5535 = const()[name = string("op_5535"), val = tensor([0, 2, 1])]; string var_5551_pad_type_0 = const()[name = string("op_5551_pad_type_0"), val = string("valid")]; int32 var_5551_groups_0 = const()[name = string("op_5551_groups_0"), val = int32(1)]; tensor var_5551_strides_0 = const()[name = string("op_5551_strides_0"), val = tensor([1])]; tensor var_5551_pad_0 = const()[name = string("op_5551_pad_0"), val = tensor([0, 0])]; tensor var_5551_dilations_0 = const()[name = string("op_5551_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896915904))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5536_cast_fp16 = transpose(perm = var_5535, x = attn_output_85_cast_fp16)[name = string("transpose_84")]; tensor var_5551_cast_fp16 = conv(dilations = var_5551_dilations_0, groups = var_5551_groups_0, pad = var_5551_pad_0, pad_type = var_5551_pad_type_0, strides = var_5551_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5536_cast_fp16)[name = string("op_5551_cast_fp16")]; tensor var_5555 = const()[name = string("op_5555"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_5555, x = var_5551_cast_fp16)[name = string("transpose_83")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_5574_axes_0 = const()[name = string("op_5574_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896926208)))]; fp16 var_5562_to_fp16 = const()[name = string("op_5562_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5574_cast_fp16 = layer_norm(axes = var_5574_axes_0, epsilon = var_5562_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_5574_cast_fp16")]; tensor var_5588 = const()[name = string("op_5588"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_5589 = transpose(perm = var_5588, x = var_5574_cast_fp16)[name = string("transpose_82")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5589)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_5611_axes_0 = const()[name = string("op_5611_axes_0"), val = tensor([2])]; tensor var_5611 = squeeze(axes = var_5611_axes_0, x = e_17)[name = string("op_5611")]; tensor var_5612 = const()[name = string("op_5612"), val = tensor([0, 2, 1])]; tensor var_5613 = transpose(perm = var_5612, x = var_5611)[name = string("transpose_81")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_5613)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_5631_axes_0 = const()[name = string("op_5631_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896931392)))]; fp16 var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5631_cast_fp16 = layer_norm(axes = var_5631_axes_0, epsilon = var_5619_to_fp16, gamma = model_model_layers_27_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_5631_cast_fp16")]; tensor var_5643 = const()[name = string("op_5643"), val = tensor([0, 2, 1])]; tensor var_5646_axes_0 = const()[name = string("op_5646_axes_0"), val = tensor([2])]; tensor var_5644 = transpose(perm = var_5643, x = var_5631_cast_fp16)[name = string("transpose_80")]; tensor var_5646 = expand_dims(axes = var_5646_axes_0, x = var_5644)[name = string("op_5646")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_5646)[name = string("query_states_73")]; string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_5646)[name = string("key_states_91")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_5646)[name = string("value_states_73")]; tensor var_5688 = const()[name = string("op_5688"), val = tensor([1, 32, 128, 64])]; tensor var_5689 = reshape(shape = var_5688, x = query_states_73)[name = string("op_5689")]; tensor var_5694 = const()[name = string("op_5694"), val = tensor([0, 1, 3, 2])]; tensor var_5699 = const()[name = string("op_5699"), val = tensor([1, 8, 128, 64])]; tensor var_5700 = reshape(shape = var_5699, x = key_states_91)[name = string("op_5700")]; tensor var_5705 = const()[name = string("op_5705"), val = tensor([0, 1, 3, 2])]; tensor var_5710 = const()[name = string("op_5710"), val = tensor([1, 8, 128, 64])]; tensor var_5711 = reshape(shape = var_5710, x = value_states_73)[name = string("op_5711")]; tensor var_5716 = const()[name = string("op_5716"), val = tensor([0, 1, 3, 2])]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor x_181 = transpose(perm = var_5694, x = var_5689)[name = string("transpose_79")]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = x_181)[name = string("mean_75")]; tensor input_167 = sub(x = x_181, y = mean_75)[name = string("input_167")]; tensor var_5733_axes_0 = const()[name = string("op_5733_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936576)))]; fp16 var_5721_to_fp16 = const()[name = string("op_5721_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5733_cast_fp16 = layer_norm(axes = var_5733_axes_0, epsilon = var_5721_to_fp16, gamma = model_model_layers_27_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_5733_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor x_183 = transpose(perm = var_5705, x = var_5700)[name = string("transpose_78")]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = x_183)[name = string("mean_77")]; tensor input_169 = sub(x = x_183, y = mean_77)[name = string("input_169")]; tensor var_5751_axes_0 = const()[name = string("op_5751_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896936896)))]; fp16 var_5739_to_fp16 = const()[name = string("op_5739_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5751_cast_fp16 = layer_norm(axes = var_5751_axes_0, epsilon = var_5739_to_fp16, gamma = model_model_layers_27_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_5751_cast_fp16")]; tensor var_5766 = mul(x = var_5733_cast_fp16, y = cos_5)[name = string("op_5766")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_5733_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_5733_cast_fp16)[name = string("x2_37")]; fp16 const_205_promoted = const()[name = string("const_205_promoted"), val = fp16(-0x1p+0)]; tensor var_5787 = mul(x = x2_37, y = const_205_promoted)[name = string("op_5787")]; int32 var_5789 = const()[name = string("op_5789"), val = int32(-1)]; bool var_5790_interleave_0 = const()[name = string("op_5790_interleave_0"), val = bool(false)]; tensor var_5790 = concat(axis = var_5789, interleave = var_5790_interleave_0, values = (var_5787, x1_37))[name = string("op_5790")]; tensor var_5791 = mul(x = var_5790, y = sin_5)[name = string("op_5791")]; tensor query_states_75 = add(x = var_5766, y = var_5791)[name = string("query_states_75")]; tensor var_5794 = mul(x = var_5751_cast_fp16, y = cos_5)[name = string("op_5794")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_5751_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_5751_cast_fp16)[name = string("x2_39")]; fp16 const_208_promoted = const()[name = string("const_208_promoted"), val = fp16(-0x1p+0)]; tensor var_5815 = mul(x = x2_39, y = const_208_promoted)[name = string("op_5815")]; int32 var_5817 = const()[name = string("op_5817"), val = int32(-1)]; bool var_5818_interleave_0 = const()[name = string("op_5818_interleave_0"), val = bool(false)]; tensor var_5818 = concat(axis = var_5817, interleave = var_5818_interleave_0, values = (var_5815, x1_39))[name = string("op_5818")]; tensor var_5819 = mul(x = var_5818, y = sin_5)[name = string("op_5819")]; tensor key_states_93 = add(x = var_5794, y = var_5819)[name = string("key_states_93")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([27])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([28])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, var_1233, concat_165_values3_0))[name = string("concat_165")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_93, x = coreml_update_state_53)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_54")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([63])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([64])]; int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, var_1233, concat_169_values3_0))[name = string("concat_169")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75 = transpose(perm = var_5716, x = var_5711)[name = string("transpose_77")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = value_states_75, x = coreml_update_state_54)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_55")]; tensor var_5890_begin_0 = const()[name = string("op_5890_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_5890_end_0 = const()[name = string("op_5890_end_0"), val = tensor([28, 8, 1024, 128])]; tensor var_5890_end_mask_0 = const()[name = string("op_5890_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5890_cast_fp16 = slice_by_index(begin = var_5890_begin_0, end = var_5890_end_0, end_mask = var_5890_end_mask_0, x = coreml_update_state_55)[name = string("op_5890_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_5890_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_5897_begin_0 = const()[name = string("op_5897_begin_0"), val = tensor([63, 0, 0, 0])]; tensor var_5897_end_0 = const()[name = string("op_5897_end_0"), val = tensor([64, 8, 1024, 128])]; tensor var_5897_end_mask_0 = const()[name = string("op_5897_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5897_cast_fp16 = slice_by_index(begin = var_5897_begin_0, end = var_5897_end_0, end_mask = var_5897_end_mask_0, x = coreml_update_state_55)[name = string("op_5897_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_5897_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_5926 = const()[name = string("op_5926"), val = tensor([1, 4, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_5926, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_5938 = const()[name = string("op_5938"), val = tensor([1, -1, 1024, 128])]; tensor key_states_97_cast_fp16 = reshape(shape = var_5938, x = x_189_cast_fp16)[name = string("key_states_97_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_5946 = const()[name = string("op_5946"), val = tensor([1, 4, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_5946, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; bool var_5973_transpose_x_0 = const()[name = string("op_5973_transpose_x_0"), val = bool(false)]; bool var_5973_transpose_y_0 = const()[name = string("op_5973_transpose_y_0"), val = bool(true)]; tensor var_5973 = matmul(transpose_x = var_5973_transpose_x_0, transpose_y = var_5973_transpose_y_0, x = query_states_75, y = key_states_97_cast_fp16)[name = string("op_5973")]; fp16 var_5974_to_fp16 = const()[name = string("op_5974_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_5973, y = var_5974_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(-1)]; tensor var_6011_cast_fp16 = softmax(axis = var_6009, x = attn_weights_39_cast_fp16)[name = string("op_6011_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([32, 64, 1024])]; tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_6011_cast_fp16)[name = string("reshape_27_cast_fp16")]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([32, 1024, 128])]; tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_195_cast_fp16)[name = string("reshape_28_cast_fp16")]; bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 32, 64, 128])]; tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor var_6023_perm_0 = const()[name = string("op_6023_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6042 = const()[name = string("op_6042"), val = tensor([1, 64, 4096])]; tensor var_6023_cast_fp16 = transpose(perm = var_6023_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_76")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_6042, x = var_6023_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_6047 = const()[name = string("op_6047"), val = tensor([0, 2, 1])]; string var_6063_pad_type_0 = const()[name = string("op_6063_pad_type_0"), val = string("valid")]; int32 var_6063_groups_0 = const()[name = string("op_6063_groups_0"), val = int32(1)]; tensor var_6063_strides_0 = const()[name = string("op_6063_strides_0"), val = tensor([1])]; tensor var_6063_pad_0 = const()[name = string("op_6063_pad_0"), val = tensor([0, 0])]; tensor var_6063_dilations_0 = const()[name = string("op_6063_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896937216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902180160))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6048_cast_fp16 = transpose(perm = var_6047, x = attn_output_95_cast_fp16)[name = string("transpose_75")]; tensor var_6063_cast_fp16 = conv(dilations = var_6063_dilations_0, groups = var_6063_groups_0, pad = var_6063_pad_0, pad_type = var_6063_pad_type_0, strides = var_6063_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6048_cast_fp16)[name = string("op_6063_cast_fp16")]; tensor var_6067 = const()[name = string("op_6067"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_6067, x = var_6063_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_6086_axes_0 = const()[name = string("op_6086_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902190464)))]; fp16 var_6074_to_fp16 = const()[name = string("op_6074_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6086_cast_fp16 = layer_norm(axes = var_6086_axes_0, epsilon = var_6074_to_fp16, gamma = model_model_layers_27_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_6086_cast_fp16")]; tensor var_6100 = const()[name = string("op_6100"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_6101 = transpose(perm = var_6100, x = var_6086_cast_fp16)[name = string("transpose_73")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6101)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_6123_axes_0 = const()[name = string("op_6123_axes_0"), val = tensor([2])]; tensor var_6123 = squeeze(axes = var_6123_axes_0, x = e_19)[name = string("op_6123")]; tensor var_6124 = const()[name = string("op_6124"), val = tensor([0, 2, 1])]; tensor var_6125 = transpose(perm = var_6124, x = var_6123)[name = string("transpose_72")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_6125)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_6143_axes_0 = const()[name = string("op_6143_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902195648)))]; fp16 var_6131_to_fp16 = const()[name = string("op_6131_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6143_cast_fp16 = layer_norm(axes = var_6143_axes_0, epsilon = var_6131_to_fp16, gamma = model_model_layers_28_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_6143_cast_fp16")]; tensor var_6155 = const()[name = string("op_6155"), val = tensor([0, 2, 1])]; tensor var_6158_axes_0 = const()[name = string("op_6158_axes_0"), val = tensor([2])]; tensor var_6156 = transpose(perm = var_6155, x = var_6143_cast_fp16)[name = string("transpose_71")]; tensor var_6158 = expand_dims(axes = var_6158_axes_0, x = var_6156)[name = string("op_6158")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_28_self_attn_q_proj_weight_palettized, x = var_6158)[name = string("query_states_81")]; string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_28_self_attn_k_proj_weight_palettized, x = var_6158)[name = string("key_states_101")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_28_self_attn_v_proj_weight_palettized, x = var_6158)[name = string("value_states_81")]; tensor var_6200 = const()[name = string("op_6200"), val = tensor([1, 32, 128, 64])]; tensor var_6201 = reshape(shape = var_6200, x = query_states_81)[name = string("op_6201")]; tensor var_6206 = const()[name = string("op_6206"), val = tensor([0, 1, 3, 2])]; tensor var_6211 = const()[name = string("op_6211"), val = tensor([1, 8, 128, 64])]; tensor var_6212 = reshape(shape = var_6211, x = key_states_101)[name = string("op_6212")]; tensor var_6217 = const()[name = string("op_6217"), val = tensor([0, 1, 3, 2])]; tensor var_6222 = const()[name = string("op_6222"), val = tensor([1, 8, 128, 64])]; tensor var_6223 = reshape(shape = var_6222, x = value_states_81)[name = string("op_6223")]; tensor var_6228 = const()[name = string("op_6228"), val = tensor([0, 1, 3, 2])]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor x_201 = transpose(perm = var_6206, x = var_6201)[name = string("transpose_70")]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = x_201)[name = string("mean_83")]; tensor input_185 = sub(x = x_201, y = mean_83)[name = string("input_185")]; tensor var_6245_axes_0 = const()[name = string("op_6245_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_28_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902200832)))]; fp16 var_6233_to_fp16 = const()[name = string("op_6233_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6245_cast_fp16 = layer_norm(axes = var_6245_axes_0, epsilon = var_6233_to_fp16, gamma = model_model_layers_28_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_6245_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor x_203 = transpose(perm = var_6217, x = var_6212)[name = string("transpose_69")]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = x_203)[name = string("mean_85")]; tensor input_187 = sub(x = x_203, y = mean_85)[name = string("input_187")]; tensor var_6263_axes_0 = const()[name = string("op_6263_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_28_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201152)))]; fp16 var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6263_cast_fp16 = layer_norm(axes = var_6263_axes_0, epsilon = var_6251_to_fp16, gamma = model_model_layers_28_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_6263_cast_fp16")]; tensor var_6278 = mul(x = var_6245_cast_fp16, y = cos_5)[name = string("op_6278")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_6245_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_6245_cast_fp16)[name = string("x2_41")]; fp16 const_227_promoted = const()[name = string("const_227_promoted"), val = fp16(-0x1p+0)]; tensor var_6299 = mul(x = x2_41, y = const_227_promoted)[name = string("op_6299")]; int32 var_6301 = const()[name = string("op_6301"), val = int32(-1)]; bool var_6302_interleave_0 = const()[name = string("op_6302_interleave_0"), val = bool(false)]; tensor var_6302 = concat(axis = var_6301, interleave = var_6302_interleave_0, values = (var_6299, x1_41))[name = string("op_6302")]; tensor var_6303 = mul(x = var_6302, y = sin_5)[name = string("op_6303")]; tensor query_states_83 = add(x = var_6278, y = var_6303)[name = string("query_states_83")]; tensor var_6306 = mul(x = var_6263_cast_fp16, y = cos_5)[name = string("op_6306")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_6263_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_6263_cast_fp16)[name = string("x2_43")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_6327 = mul(x = x2_43, y = const_230_promoted)[name = string("op_6327")]; int32 var_6329 = const()[name = string("op_6329"), val = int32(-1)]; bool var_6330_interleave_0 = const()[name = string("op_6330_interleave_0"), val = bool(false)]; tensor var_6330 = concat(axis = var_6329, interleave = var_6330_interleave_0, values = (var_6327, x1_43))[name = string("op_6330")]; tensor var_6331 = mul(x = var_6330, y = sin_5)[name = string("op_6331")]; tensor key_states_103 = add(x = var_6306, y = var_6331)[name = string("key_states_103")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([28])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([29])]; int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, var_1233, concat_183_values3_0))[name = string("concat_183")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_103, x = coreml_update_state_55)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_56")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([64])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([65])]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, var_1233, concat_187_values3_0))[name = string("concat_187")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83 = transpose(perm = var_6228, x = var_6223)[name = string("transpose_68")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = value_states_83, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_57")]; tensor var_6402_begin_0 = const()[name = string("op_6402_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_6402_end_0 = const()[name = string("op_6402_end_0"), val = tensor([29, 8, 1024, 128])]; tensor var_6402_end_mask_0 = const()[name = string("op_6402_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6402_cast_fp16 = slice_by_index(begin = var_6402_begin_0, end = var_6402_end_0, end_mask = var_6402_end_mask_0, x = coreml_update_state_57)[name = string("op_6402_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_6402_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_6409_begin_0 = const()[name = string("op_6409_begin_0"), val = tensor([64, 0, 0, 0])]; tensor var_6409_end_0 = const()[name = string("op_6409_end_0"), val = tensor([65, 8, 1024, 128])]; tensor var_6409_end_mask_0 = const()[name = string("op_6409_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6409_cast_fp16 = slice_by_index(begin = var_6409_begin_0, end = var_6409_end_0, end_mask = var_6409_end_mask_0, x = coreml_update_state_57)[name = string("op_6409_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_6409_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_6438 = const()[name = string("op_6438"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_6438, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_6450 = const()[name = string("op_6450"), val = tensor([1, -1, 1024, 128])]; tensor key_states_107_cast_fp16 = reshape(shape = var_6450, x = x_209_cast_fp16)[name = string("key_states_107_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_6458 = const()[name = string("op_6458"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_6458, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_6485_transpose_x_0 = const()[name = string("op_6485_transpose_x_0"), val = bool(false)]; bool var_6485_transpose_y_0 = const()[name = string("op_6485_transpose_y_0"), val = bool(true)]; tensor var_6485 = matmul(transpose_x = var_6485_transpose_x_0, transpose_y = var_6485_transpose_y_0, x = query_states_83, y = key_states_107_cast_fp16)[name = string("op_6485")]; fp16 var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_41_cast_fp16 = mul(x = var_6485, y = var_6486_to_fp16)[name = string("attn_weights_41_cast_fp16")]; tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask)[name = string("attn_weights_43_cast_fp16")]; int32 var_6521 = const()[name = string("op_6521"), val = int32(-1)]; tensor var_6523_cast_fp16 = softmax(axis = var_6521, x = attn_weights_43_cast_fp16)[name = string("op_6523_cast_fp16")]; tensor concat_192 = const()[name = string("concat_192"), val = tensor([32, 64, 1024])]; tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_6523_cast_fp16)[name = string("reshape_30_cast_fp16")]; tensor concat_193 = const()[name = string("concat_193"), val = tensor([32, 1024, 128])]; tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_215_cast_fp16)[name = string("reshape_31_cast_fp16")]; bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 32, 64, 128])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor var_6535_perm_0 = const()[name = string("op_6535_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6554 = const()[name = string("op_6554"), val = tensor([1, 64, 4096])]; tensor var_6535_cast_fp16 = transpose(perm = var_6535_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_67")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_6554, x = var_6535_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_6559 = const()[name = string("op_6559"), val = tensor([0, 2, 1])]; string var_6575_pad_type_0 = const()[name = string("op_6575_pad_type_0"), val = string("valid")]; int32 var_6575_groups_0 = const()[name = string("op_6575_groups_0"), val = int32(1)]; tensor var_6575_strides_0 = const()[name = string("op_6575_strides_0"), val = tensor([1])]; tensor var_6575_pad_0 = const()[name = string("op_6575_pad_0"), val = tensor([0, 0])]; tensor var_6575_dilations_0 = const()[name = string("op_6575_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902201472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907444416))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6560_cast_fp16 = transpose(perm = var_6559, x = attn_output_105_cast_fp16)[name = string("transpose_66")]; tensor var_6575_cast_fp16 = conv(dilations = var_6575_dilations_0, groups = var_6575_groups_0, pad = var_6575_pad_0, pad_type = var_6575_pad_type_0, strides = var_6575_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6560_cast_fp16)[name = string("op_6575_cast_fp16")]; tensor var_6579 = const()[name = string("op_6579"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_6579, x = var_6575_cast_fp16)[name = string("transpose_65")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_6598_axes_0 = const()[name = string("op_6598_axes_0"), val = tensor([-1])]; tensor model_model_layers_28_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907454720)))]; fp16 var_6586_to_fp16 = const()[name = string("op_6586_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6598_cast_fp16 = layer_norm(axes = var_6598_axes_0, epsilon = var_6586_to_fp16, gamma = model_model_layers_28_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_6598_cast_fp16")]; tensor var_6612 = const()[name = string("op_6612"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_6613 = transpose(perm = var_6612, x = var_6598_cast_fp16)[name = string("transpose_64")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6613)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_28_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_28_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_28_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_6635_axes_0 = const()[name = string("op_6635_axes_0"), val = tensor([2])]; tensor var_6635 = squeeze(axes = var_6635_axes_0, x = e_21)[name = string("op_6635")]; tensor var_6636 = const()[name = string("op_6636"), val = tensor([0, 2, 1])]; tensor var_6637 = transpose(perm = var_6636, x = var_6635)[name = string("transpose_63")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_6637)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_6655_axes_0 = const()[name = string("op_6655_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907459904)))]; fp16 var_6643_to_fp16 = const()[name = string("op_6643_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6655_cast_fp16 = layer_norm(axes = var_6655_axes_0, epsilon = var_6643_to_fp16, gamma = model_model_layers_29_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor var_6667 = const()[name = string("op_6667"), val = tensor([0, 2, 1])]; tensor var_6670_axes_0 = const()[name = string("op_6670_axes_0"), val = tensor([2])]; tensor var_6668 = transpose(perm = var_6667, x = var_6655_cast_fp16)[name = string("transpose_62")]; tensor var_6670 = expand_dims(axes = var_6670_axes_0, x = var_6668)[name = string("op_6670")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_29_self_attn_q_proj_weight_palettized, x = var_6670)[name = string("query_states_89")]; string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_29_self_attn_k_proj_weight_palettized, x = var_6670)[name = string("key_states_111")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_29_self_attn_v_proj_weight_palettized, x = var_6670)[name = string("value_states_89")]; tensor var_6712 = const()[name = string("op_6712"), val = tensor([1, 32, 128, 64])]; tensor var_6713 = reshape(shape = var_6712, x = query_states_89)[name = string("op_6713")]; tensor var_6718 = const()[name = string("op_6718"), val = tensor([0, 1, 3, 2])]; tensor var_6723 = const()[name = string("op_6723"), val = tensor([1, 8, 128, 64])]; tensor var_6724 = reshape(shape = var_6723, x = key_states_111)[name = string("op_6724")]; tensor var_6729 = const()[name = string("op_6729"), val = tensor([0, 1, 3, 2])]; tensor var_6734 = const()[name = string("op_6734"), val = tensor([1, 8, 128, 64])]; tensor var_6735 = reshape(shape = var_6734, x = value_states_89)[name = string("op_6735")]; tensor var_6740 = const()[name = string("op_6740"), val = tensor([0, 1, 3, 2])]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor x_221 = transpose(perm = var_6718, x = var_6713)[name = string("transpose_61")]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = x_221)[name = string("mean_91")]; tensor input_203 = sub(x = x_221, y = mean_91)[name = string("input_203")]; tensor var_6757_axes_0 = const()[name = string("op_6757_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_29_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465088)))]; fp16 var_6745_to_fp16 = const()[name = string("op_6745_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6757_cast_fp16 = layer_norm(axes = var_6757_axes_0, epsilon = var_6745_to_fp16, gamma = model_model_layers_29_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_6757_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor x_223 = transpose(perm = var_6729, x = var_6724)[name = string("transpose_60")]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = x_223)[name = string("mean_93")]; tensor input_205 = sub(x = x_223, y = mean_93)[name = string("input_205")]; tensor var_6775_axes_0 = const()[name = string("op_6775_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_29_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465408)))]; fp16 var_6763_to_fp16 = const()[name = string("op_6763_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6775_cast_fp16 = layer_norm(axes = var_6775_axes_0, epsilon = var_6763_to_fp16, gamma = model_model_layers_29_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_6775_cast_fp16")]; tensor var_6790 = mul(x = var_6757_cast_fp16, y = cos_5)[name = string("op_6790")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_6757_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_6757_cast_fp16)[name = string("x2_45")]; fp16 const_249_promoted = const()[name = string("const_249_promoted"), val = fp16(-0x1p+0)]; tensor var_6811 = mul(x = x2_45, y = const_249_promoted)[name = string("op_6811")]; int32 var_6813 = const()[name = string("op_6813"), val = int32(-1)]; bool var_6814_interleave_0 = const()[name = string("op_6814_interleave_0"), val = bool(false)]; tensor var_6814 = concat(axis = var_6813, interleave = var_6814_interleave_0, values = (var_6811, x1_45))[name = string("op_6814")]; tensor var_6815 = mul(x = var_6814, y = sin_5)[name = string("op_6815")]; tensor query_states_91 = add(x = var_6790, y = var_6815)[name = string("query_states_91")]; tensor var_6818 = mul(x = var_6775_cast_fp16, y = cos_5)[name = string("op_6818")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_6775_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_6775_cast_fp16)[name = string("x2_47")]; fp16 const_252_promoted = const()[name = string("const_252_promoted"), val = fp16(-0x1p+0)]; tensor var_6839 = mul(x = x2_47, y = const_252_promoted)[name = string("op_6839")]; int32 var_6841 = const()[name = string("op_6841"), val = int32(-1)]; bool var_6842_interleave_0 = const()[name = string("op_6842_interleave_0"), val = bool(false)]; tensor var_6842 = concat(axis = var_6841, interleave = var_6842_interleave_0, values = (var_6839, x1_47))[name = string("op_6842")]; tensor var_6843 = mul(x = var_6842, y = sin_5)[name = string("op_6843")]; tensor key_states_113 = add(x = var_6818, y = var_6843)[name = string("key_states_113")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([29])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([30])]; int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_200")]; tensor concat_201_values1_0 = const()[name = string("concat_201_values1_0"), val = tensor([0])]; tensor concat_201_values3_0 = const()[name = string("concat_201_values3_0"), val = tensor([0])]; int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (expand_dims_136, concat_201_values1_0, var_1233, concat_201_values3_0))[name = string("concat_201")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_200, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_201, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_113, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_58")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([65])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([66])]; int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_204")]; tensor concat_205_values1_0 = const()[name = string("concat_205_values1_0"), val = tensor([0])]; tensor concat_205_values3_0 = const()[name = string("concat_205_values3_0"), val = tensor([0])]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (expand_dims_142, concat_205_values1_0, var_1233, concat_205_values3_0))[name = string("concat_205")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91 = transpose(perm = var_6740, x = var_6735)[name = string("transpose_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = value_states_91, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_59")]; tensor var_6914_begin_0 = const()[name = string("op_6914_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_6914_end_0 = const()[name = string("op_6914_end_0"), val = tensor([30, 8, 1024, 128])]; tensor var_6914_end_mask_0 = const()[name = string("op_6914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6914_cast_fp16 = slice_by_index(begin = var_6914_begin_0, end = var_6914_end_0, end_mask = var_6914_end_mask_0, x = coreml_update_state_59)[name = string("op_6914_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_6914_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_6921_begin_0 = const()[name = string("op_6921_begin_0"), val = tensor([65, 0, 0, 0])]; tensor var_6921_end_0 = const()[name = string("op_6921_end_0"), val = tensor([66, 8, 1024, 128])]; tensor var_6921_end_mask_0 = const()[name = string("op_6921_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6921_cast_fp16 = slice_by_index(begin = var_6921_begin_0, end = var_6921_end_0, end_mask = var_6921_end_mask_0, x = coreml_update_state_59)[name = string("op_6921_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_6921_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_6950 = const()[name = string("op_6950"), val = tensor([1, 4, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_6950, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_6962 = const()[name = string("op_6962"), val = tensor([1, -1, 1024, 128])]; tensor key_states_117_cast_fp16 = reshape(shape = var_6962, x = x_229_cast_fp16)[name = string("key_states_117_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_6970 = const()[name = string("op_6970"), val = tensor([1, 4, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_6970, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; bool var_6997_transpose_x_0 = const()[name = string("op_6997_transpose_x_0"), val = bool(false)]; bool var_6997_transpose_y_0 = const()[name = string("op_6997_transpose_y_0"), val = bool(true)]; tensor var_6997 = matmul(transpose_x = var_6997_transpose_x_0, transpose_y = var_6997_transpose_y_0, x = query_states_91, y = key_states_117_cast_fp16)[name = string("op_6997")]; fp16 var_6998_to_fp16 = const()[name = string("op_6998_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_45_cast_fp16 = mul(x = var_6997, y = var_6998_to_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; int32 var_7033 = const()[name = string("op_7033"), val = int32(-1)]; tensor var_7035_cast_fp16 = softmax(axis = var_7033, x = attn_weights_47_cast_fp16)[name = string("op_7035_cast_fp16")]; tensor concat_210 = const()[name = string("concat_210"), val = tensor([32, 64, 1024])]; tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_7035_cast_fp16)[name = string("reshape_33_cast_fp16")]; tensor concat_211 = const()[name = string("concat_211"), val = tensor([32, 1024, 128])]; tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_235_cast_fp16)[name = string("reshape_34_cast_fp16")]; bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 32, 64, 128])]; tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor var_7047_perm_0 = const()[name = string("op_7047_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7066 = const()[name = string("op_7066"), val = tensor([1, 64, 4096])]; tensor var_7047_cast_fp16 = transpose(perm = var_7047_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_58")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_7066, x = var_7047_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_7071 = const()[name = string("op_7071"), val = tensor([0, 2, 1])]; string var_7087_pad_type_0 = const()[name = string("op_7087_pad_type_0"), val = string("valid")]; int32 var_7087_groups_0 = const()[name = string("op_7087_groups_0"), val = int32(1)]; tensor var_7087_strides_0 = const()[name = string("op_7087_strides_0"), val = tensor([1])]; tensor var_7087_pad_0 = const()[name = string("op_7087_pad_0"), val = tensor([0, 0])]; tensor var_7087_dilations_0 = const()[name = string("op_7087_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907465728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912708672))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7072_cast_fp16 = transpose(perm = var_7071, x = attn_output_115_cast_fp16)[name = string("transpose_57")]; tensor var_7087_cast_fp16 = conv(dilations = var_7087_dilations_0, groups = var_7087_groups_0, pad = var_7087_pad_0, pad_type = var_7087_pad_type_0, strides = var_7087_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7072_cast_fp16)[name = string("op_7087_cast_fp16")]; tensor var_7091 = const()[name = string("op_7091"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_7091, x = var_7087_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_7110_axes_0 = const()[name = string("op_7110_axes_0"), val = tensor([-1])]; tensor model_model_layers_29_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912718976)))]; fp16 var_7098_to_fp16 = const()[name = string("op_7098_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7110_cast_fp16 = layer_norm(axes = var_7110_axes_0, epsilon = var_7098_to_fp16, gamma = model_model_layers_29_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_7110_cast_fp16")]; tensor var_7124 = const()[name = string("op_7124"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_7125 = transpose(perm = var_7124, x = var_7110_cast_fp16)[name = string("transpose_55")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7125)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_29_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_29_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_29_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_7147_axes_0 = const()[name = string("op_7147_axes_0"), val = tensor([2])]; tensor var_7147 = squeeze(axes = var_7147_axes_0, x = e_23)[name = string("op_7147")]; tensor var_7148 = const()[name = string("op_7148"), val = tensor([0, 2, 1])]; tensor var_7149 = transpose(perm = var_7148, x = var_7147)[name = string("transpose_54")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_7149)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_7167_axes_0 = const()[name = string("op_7167_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912724160)))]; fp16 var_7155_to_fp16 = const()[name = string("op_7155_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7167_cast_fp16 = layer_norm(axes = var_7167_axes_0, epsilon = var_7155_to_fp16, gamma = model_model_layers_30_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_7167_cast_fp16")]; tensor var_7179 = const()[name = string("op_7179"), val = tensor([0, 2, 1])]; tensor var_7182_axes_0 = const()[name = string("op_7182_axes_0"), val = tensor([2])]; tensor var_7180 = transpose(perm = var_7179, x = var_7167_cast_fp16)[name = string("transpose_53")]; tensor var_7182 = expand_dims(axes = var_7182_axes_0, x = var_7180)[name = string("op_7182")]; string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_30_self_attn_q_proj_weight_palettized, x = var_7182)[name = string("query_states_97")]; string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_30_self_attn_k_proj_weight_palettized, x = var_7182)[name = string("key_states_121")]; string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_30_self_attn_v_proj_weight_palettized, x = var_7182)[name = string("value_states_97")]; tensor var_7224 = const()[name = string("op_7224"), val = tensor([1, 32, 128, 64])]; tensor var_7225 = reshape(shape = var_7224, x = query_states_97)[name = string("op_7225")]; tensor var_7230 = const()[name = string("op_7230"), val = tensor([0, 1, 3, 2])]; tensor var_7235 = const()[name = string("op_7235"), val = tensor([1, 8, 128, 64])]; tensor var_7236 = reshape(shape = var_7235, x = key_states_121)[name = string("op_7236")]; tensor var_7241 = const()[name = string("op_7241"), val = tensor([0, 1, 3, 2])]; tensor var_7246 = const()[name = string("op_7246"), val = tensor([1, 8, 128, 64])]; tensor var_7247 = reshape(shape = var_7246, x = value_states_97)[name = string("op_7247")]; tensor var_7252 = const()[name = string("op_7252"), val = tensor([0, 1, 3, 2])]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor x_241 = transpose(perm = var_7230, x = var_7225)[name = string("transpose_52")]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = x_241)[name = string("mean_99")]; tensor input_221 = sub(x = x_241, y = mean_99)[name = string("input_221")]; tensor var_7269_axes_0 = const()[name = string("op_7269_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_30_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729344)))]; fp16 var_7257_to_fp16 = const()[name = string("op_7257_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7269_cast_fp16 = layer_norm(axes = var_7269_axes_0, epsilon = var_7257_to_fp16, gamma = model_model_layers_30_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_7269_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor x_243 = transpose(perm = var_7241, x = var_7236)[name = string("transpose_51")]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = x_243)[name = string("mean_101")]; tensor input_223 = sub(x = x_243, y = mean_101)[name = string("input_223")]; tensor var_7287_axes_0 = const()[name = string("op_7287_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_30_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729664)))]; fp16 var_7275_to_fp16 = const()[name = string("op_7275_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7287_cast_fp16 = layer_norm(axes = var_7287_axes_0, epsilon = var_7275_to_fp16, gamma = model_model_layers_30_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_7287_cast_fp16")]; tensor var_7302 = mul(x = var_7269_cast_fp16, y = cos_5)[name = string("op_7302")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_7269_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_7269_cast_fp16)[name = string("x2_49")]; fp16 const_271_promoted = const()[name = string("const_271_promoted"), val = fp16(-0x1p+0)]; tensor var_7323 = mul(x = x2_49, y = const_271_promoted)[name = string("op_7323")]; int32 var_7325 = const()[name = string("op_7325"), val = int32(-1)]; bool var_7326_interleave_0 = const()[name = string("op_7326_interleave_0"), val = bool(false)]; tensor var_7326 = concat(axis = var_7325, interleave = var_7326_interleave_0, values = (var_7323, x1_49))[name = string("op_7326")]; tensor var_7327 = mul(x = var_7326, y = sin_5)[name = string("op_7327")]; tensor query_states_99 = add(x = var_7302, y = var_7327)[name = string("query_states_99")]; tensor var_7330 = mul(x = var_7287_cast_fp16, y = cos_5)[name = string("op_7330")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_7287_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_7287_cast_fp16)[name = string("x2_51")]; fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; tensor var_7351 = mul(x = x2_51, y = const_274_promoted)[name = string("op_7351")]; int32 var_7353 = const()[name = string("op_7353"), val = int32(-1)]; bool var_7354_interleave_0 = const()[name = string("op_7354_interleave_0"), val = bool(false)]; tensor var_7354 = concat(axis = var_7353, interleave = var_7354_interleave_0, values = (var_7351, x1_51))[name = string("op_7354")]; tensor var_7355 = mul(x = var_7354, y = sin_5)[name = string("op_7355")]; tensor key_states_123 = add(x = var_7330, y = var_7355)[name = string("key_states_123")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([30])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([31])]; int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, var_1233, concat_219_values3_0))[name = string("concat_219")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_123, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_60")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([66])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([67])]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, var_1233, concat_223_values3_0))[name = string("concat_223")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_99 = transpose(perm = var_7252, x = var_7247)[name = string("transpose_50")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = value_states_99, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_61")]; tensor var_7426_begin_0 = const()[name = string("op_7426_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_7426_end_0 = const()[name = string("op_7426_end_0"), val = tensor([31, 8, 1024, 128])]; tensor var_7426_end_mask_0 = const()[name = string("op_7426_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7426_cast_fp16 = slice_by_index(begin = var_7426_begin_0, end = var_7426_end_0, end_mask = var_7426_end_mask_0, x = coreml_update_state_61)[name = string("op_7426_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_7426_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_7433_begin_0 = const()[name = string("op_7433_begin_0"), val = tensor([66, 0, 0, 0])]; tensor var_7433_end_0 = const()[name = string("op_7433_end_0"), val = tensor([67, 8, 1024, 128])]; tensor var_7433_end_mask_0 = const()[name = string("op_7433_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7433_cast_fp16 = slice_by_index(begin = var_7433_begin_0, end = var_7433_end_0, end_mask = var_7433_end_mask_0, x = coreml_update_state_61)[name = string("op_7433_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_7433_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_7462 = const()[name = string("op_7462"), val = tensor([1, 4, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_7462, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_7474 = const()[name = string("op_7474"), val = tensor([1, -1, 1024, 128])]; tensor key_states_127_cast_fp16 = reshape(shape = var_7474, x = x_249_cast_fp16)[name = string("key_states_127_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_7482 = const()[name = string("op_7482"), val = tensor([1, 4, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_7482, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; bool var_7509_transpose_x_0 = const()[name = string("op_7509_transpose_x_0"), val = bool(false)]; bool var_7509_transpose_y_0 = const()[name = string("op_7509_transpose_y_0"), val = bool(true)]; tensor var_7509 = matmul(transpose_x = var_7509_transpose_x_0, transpose_y = var_7509_transpose_y_0, x = query_states_99, y = key_states_127_cast_fp16)[name = string("op_7509")]; fp16 var_7510_to_fp16 = const()[name = string("op_7510_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_7509, y = var_7510_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_7545 = const()[name = string("op_7545"), val = int32(-1)]; tensor var_7547_cast_fp16 = softmax(axis = var_7545, x = attn_weights_51_cast_fp16)[name = string("op_7547_cast_fp16")]; tensor concat_228 = const()[name = string("concat_228"), val = tensor([32, 64, 1024])]; tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_7547_cast_fp16)[name = string("reshape_36_cast_fp16")]; tensor concat_229 = const()[name = string("concat_229"), val = tensor([32, 1024, 128])]; tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_255_cast_fp16)[name = string("reshape_37_cast_fp16")]; bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 32, 64, 128])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor var_7559_perm_0 = const()[name = string("op_7559_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7578 = const()[name = string("op_7578"), val = tensor([1, 64, 4096])]; tensor var_7559_cast_fp16 = transpose(perm = var_7559_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_49")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_7578, x = var_7559_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_7583 = const()[name = string("op_7583"), val = tensor([0, 2, 1])]; string var_7599_pad_type_0 = const()[name = string("op_7599_pad_type_0"), val = string("valid")]; int32 var_7599_groups_0 = const()[name = string("op_7599_groups_0"), val = int32(1)]; tensor var_7599_strides_0 = const()[name = string("op_7599_strides_0"), val = tensor([1])]; tensor var_7599_pad_0 = const()[name = string("op_7599_pad_0"), val = tensor([0, 0])]; tensor var_7599_dilations_0 = const()[name = string("op_7599_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912729984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917972928))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7584_cast_fp16 = transpose(perm = var_7583, x = attn_output_125_cast_fp16)[name = string("transpose_48")]; tensor var_7599_cast_fp16 = conv(dilations = var_7599_dilations_0, groups = var_7599_groups_0, pad = var_7599_pad_0, pad_type = var_7599_pad_type_0, strides = var_7599_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7584_cast_fp16)[name = string("op_7599_cast_fp16")]; tensor var_7603 = const()[name = string("op_7603"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_7603, x = var_7599_cast_fp16)[name = string("transpose_47")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_7622_axes_0 = const()[name = string("op_7622_axes_0"), val = tensor([-1])]; tensor model_model_layers_30_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917983232)))]; fp16 var_7610_to_fp16 = const()[name = string("op_7610_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7622_cast_fp16 = layer_norm(axes = var_7622_axes_0, epsilon = var_7610_to_fp16, gamma = model_model_layers_30_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_7622_cast_fp16")]; tensor var_7636 = const()[name = string("op_7636"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_7637 = transpose(perm = var_7636, x = var_7622_cast_fp16)[name = string("transpose_46")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7637)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_30_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_30_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_30_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_7659_axes_0 = const()[name = string("op_7659_axes_0"), val = tensor([2])]; tensor var_7659 = squeeze(axes = var_7659_axes_0, x = e_25)[name = string("op_7659")]; tensor var_7660 = const()[name = string("op_7660"), val = tensor([0, 2, 1])]; tensor var_7661 = transpose(perm = var_7660, x = var_7659)[name = string("transpose_45")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_7661)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_7679_axes_0 = const()[name = string("op_7679_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917988416)))]; fp16 var_7667_to_fp16 = const()[name = string("op_7667_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7679_cast_fp16 = layer_norm(axes = var_7679_axes_0, epsilon = var_7667_to_fp16, gamma = model_model_layers_31_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_7679_cast_fp16")]; tensor var_7691 = const()[name = string("op_7691"), val = tensor([0, 2, 1])]; tensor var_7694_axes_0 = const()[name = string("op_7694_axes_0"), val = tensor([2])]; tensor var_7692 = transpose(perm = var_7691, x = var_7679_cast_fp16)[name = string("transpose_44")]; tensor var_7694 = expand_dims(axes = var_7694_axes_0, x = var_7692)[name = string("op_7694")]; string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_31_self_attn_q_proj_weight_palettized, x = var_7694)[name = string("query_states_105")]; string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_31_self_attn_k_proj_weight_palettized, x = var_7694)[name = string("key_states_131")]; string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_31_self_attn_v_proj_weight_palettized, x = var_7694)[name = string("value_states_105")]; tensor var_7736 = const()[name = string("op_7736"), val = tensor([1, 32, 128, 64])]; tensor var_7737 = reshape(shape = var_7736, x = query_states_105)[name = string("op_7737")]; tensor var_7742 = const()[name = string("op_7742"), val = tensor([0, 1, 3, 2])]; tensor var_7747 = const()[name = string("op_7747"), val = tensor([1, 8, 128, 64])]; tensor var_7748 = reshape(shape = var_7747, x = key_states_131)[name = string("op_7748")]; tensor var_7753 = const()[name = string("op_7753"), val = tensor([0, 1, 3, 2])]; tensor var_7758 = const()[name = string("op_7758"), val = tensor([1, 8, 128, 64])]; tensor var_7759 = reshape(shape = var_7758, x = value_states_105)[name = string("op_7759")]; tensor var_7764 = const()[name = string("op_7764"), val = tensor([0, 1, 3, 2])]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor x_261 = transpose(perm = var_7742, x = var_7737)[name = string("transpose_43")]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = x_261)[name = string("mean_107")]; tensor input_239 = sub(x = x_261, y = mean_107)[name = string("input_239")]; tensor var_7781_axes_0 = const()[name = string("op_7781_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_31_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993600)))]; fp16 var_7769_to_fp16 = const()[name = string("op_7769_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7781_cast_fp16 = layer_norm(axes = var_7781_axes_0, epsilon = var_7769_to_fp16, gamma = model_model_layers_31_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_7781_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor x_263 = transpose(perm = var_7753, x = var_7748)[name = string("transpose_42")]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = x_263)[name = string("mean_109")]; tensor input_241 = sub(x = x_263, y = mean_109)[name = string("input_241")]; tensor var_7799_axes_0 = const()[name = string("op_7799_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_31_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917993920)))]; fp16 var_7787_to_fp16 = const()[name = string("op_7787_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7799_cast_fp16 = layer_norm(axes = var_7799_axes_0, epsilon = var_7787_to_fp16, gamma = model_model_layers_31_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_7799_cast_fp16")]; tensor var_7814 = mul(x = var_7781_cast_fp16, y = cos_5)[name = string("op_7814")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_7781_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_7781_cast_fp16)[name = string("x2_53")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_7835 = mul(x = x2_53, y = const_293_promoted)[name = string("op_7835")]; int32 var_7837 = const()[name = string("op_7837"), val = int32(-1)]; bool var_7838_interleave_0 = const()[name = string("op_7838_interleave_0"), val = bool(false)]; tensor var_7838 = concat(axis = var_7837, interleave = var_7838_interleave_0, values = (var_7835, x1_53))[name = string("op_7838")]; tensor var_7839 = mul(x = var_7838, y = sin_5)[name = string("op_7839")]; tensor query_states_107 = add(x = var_7814, y = var_7839)[name = string("query_states_107")]; tensor var_7842 = mul(x = var_7799_cast_fp16, y = cos_5)[name = string("op_7842")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_7799_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_7799_cast_fp16)[name = string("x2_55")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_7863 = mul(x = x2_55, y = const_296_promoted)[name = string("op_7863")]; int32 var_7865 = const()[name = string("op_7865"), val = int32(-1)]; bool var_7866_interleave_0 = const()[name = string("op_7866_interleave_0"), val = bool(false)]; tensor var_7866 = concat(axis = var_7865, interleave = var_7866_interleave_0, values = (var_7863, x1_55))[name = string("op_7866")]; tensor var_7867 = mul(x = var_7866, y = sin_5)[name = string("op_7867")]; tensor key_states_133 = add(x = var_7842, y = var_7867)[name = string("key_states_133")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([31])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([32])]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, var_1233, concat_237_values3_0))[name = string("concat_237")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_133, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_62")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([67])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([68])]; int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, var_1233, concat_241_values3_0))[name = string("concat_241")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_107 = transpose(perm = var_7764, x = var_7759)[name = string("transpose_41")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = value_states_107, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_63")]; tensor var_7938_begin_0 = const()[name = string("op_7938_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_7938_end_0 = const()[name = string("op_7938_end_0"), val = tensor([32, 8, 1024, 128])]; tensor var_7938_end_mask_0 = const()[name = string("op_7938_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7938_cast_fp16 = slice_by_index(begin = var_7938_begin_0, end = var_7938_end_0, end_mask = var_7938_end_mask_0, x = coreml_update_state_63)[name = string("op_7938_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_7938_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_7945_begin_0 = const()[name = string("op_7945_begin_0"), val = tensor([67, 0, 0, 0])]; tensor var_7945_end_0 = const()[name = string("op_7945_end_0"), val = tensor([68, 8, 1024, 128])]; tensor var_7945_end_mask_0 = const()[name = string("op_7945_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7945_cast_fp16 = slice_by_index(begin = var_7945_begin_0, end = var_7945_end_0, end_mask = var_7945_end_mask_0, x = coreml_update_state_63)[name = string("op_7945_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_7945_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_7974 = const()[name = string("op_7974"), val = tensor([1, 4, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_7974, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_7986 = const()[name = string("op_7986"), val = tensor([1, -1, 1024, 128])]; tensor key_states_137_cast_fp16 = reshape(shape = var_7986, x = x_269_cast_fp16)[name = string("key_states_137_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_7994 = const()[name = string("op_7994"), val = tensor([1, 4, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_7994, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; bool var_8021_transpose_x_0 = const()[name = string("op_8021_transpose_x_0"), val = bool(false)]; bool var_8021_transpose_y_0 = const()[name = string("op_8021_transpose_y_0"), val = bool(true)]; tensor var_8021 = matmul(transpose_x = var_8021_transpose_x_0, transpose_y = var_8021_transpose_y_0, x = query_states_107, y = key_states_137_cast_fp16)[name = string("op_8021")]; fp16 var_8022_to_fp16 = const()[name = string("op_8022_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_53_cast_fp16 = mul(x = var_8021, y = var_8022_to_fp16)[name = string("attn_weights_53_cast_fp16")]; tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask)[name = string("attn_weights_55_cast_fp16")]; int32 var_8057 = const()[name = string("op_8057"), val = int32(-1)]; tensor var_8059_cast_fp16 = softmax(axis = var_8057, x = attn_weights_55_cast_fp16)[name = string("op_8059_cast_fp16")]; tensor concat_246 = const()[name = string("concat_246"), val = tensor([32, 64, 1024])]; tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_8059_cast_fp16)[name = string("reshape_39_cast_fp16")]; tensor concat_247 = const()[name = string("concat_247"), val = tensor([32, 1024, 128])]; tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_275_cast_fp16)[name = string("reshape_40_cast_fp16")]; bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 32, 64, 128])]; tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor var_8071_perm_0 = const()[name = string("op_8071_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8090 = const()[name = string("op_8090"), val = tensor([1, 64, 4096])]; tensor var_8071_cast_fp16 = transpose(perm = var_8071_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_40")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_8090, x = var_8071_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_8095 = const()[name = string("op_8095"), val = tensor([0, 2, 1])]; string var_8111_pad_type_0 = const()[name = string("op_8111_pad_type_0"), val = string("valid")]; int32 var_8111_groups_0 = const()[name = string("op_8111_groups_0"), val = int32(1)]; tensor var_8111_strides_0 = const()[name = string("op_8111_strides_0"), val = tensor([1])]; tensor var_8111_pad_0 = const()[name = string("op_8111_pad_0"), val = tensor([0, 0])]; tensor var_8111_dilations_0 = const()[name = string("op_8111_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917994240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923237184))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8096_cast_fp16 = transpose(perm = var_8095, x = attn_output_135_cast_fp16)[name = string("transpose_39")]; tensor var_8111_cast_fp16 = conv(dilations = var_8111_dilations_0, groups = var_8111_groups_0, pad = var_8111_pad_0, pad_type = var_8111_pad_type_0, strides = var_8111_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8096_cast_fp16)[name = string("op_8111_cast_fp16")]; tensor var_8115 = const()[name = string("op_8115"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_8115, x = var_8111_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_8134_axes_0 = const()[name = string("op_8134_axes_0"), val = tensor([-1])]; tensor model_model_layers_31_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923247488)))]; fp16 var_8122_to_fp16 = const()[name = string("op_8122_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8134_cast_fp16 = layer_norm(axes = var_8134_axes_0, epsilon = var_8122_to_fp16, gamma = model_model_layers_31_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_8134_cast_fp16")]; tensor var_8148 = const()[name = string("op_8148"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_8149 = transpose(perm = var_8148, x = var_8134_cast_fp16)[name = string("transpose_37")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_8149)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_31_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_31_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_31_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_8171_axes_0 = const()[name = string("op_8171_axes_0"), val = tensor([2])]; tensor var_8171 = squeeze(axes = var_8171_axes_0, x = e_27)[name = string("op_8171")]; tensor var_8172 = const()[name = string("op_8172"), val = tensor([0, 2, 1])]; tensor var_8173 = transpose(perm = var_8172, x = var_8171)[name = string("transpose_36")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_8173)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_8191_axes_0 = const()[name = string("op_8191_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_32_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923252672)))]; fp16 var_8179_to_fp16 = const()[name = string("op_8179_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8191_cast_fp16 = layer_norm(axes = var_8191_axes_0, epsilon = var_8179_to_fp16, gamma = model_model_layers_32_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_8191_cast_fp16")]; tensor var_8203 = const()[name = string("op_8203"), val = tensor([0, 2, 1])]; tensor var_8206_axes_0 = const()[name = string("op_8206_axes_0"), val = tensor([2])]; tensor var_8204 = transpose(perm = var_8203, x = var_8191_cast_fp16)[name = string("transpose_35")]; tensor var_8206 = expand_dims(axes = var_8206_axes_0, x = var_8204)[name = string("op_8206")]; string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_32_self_attn_q_proj_weight_palettized, x = var_8206)[name = string("query_states_113")]; string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_32_self_attn_k_proj_weight_palettized, x = var_8206)[name = string("key_states_141")]; string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_32_self_attn_v_proj_weight_palettized, x = var_8206)[name = string("value_states_113")]; tensor var_8248 = const()[name = string("op_8248"), val = tensor([1, 32, 128, 64])]; tensor var_8249 = reshape(shape = var_8248, x = query_states_113)[name = string("op_8249")]; tensor var_8254 = const()[name = string("op_8254"), val = tensor([0, 1, 3, 2])]; tensor var_8259 = const()[name = string("op_8259"), val = tensor([1, 8, 128, 64])]; tensor var_8260 = reshape(shape = var_8259, x = key_states_141)[name = string("op_8260")]; tensor var_8265 = const()[name = string("op_8265"), val = tensor([0, 1, 3, 2])]; tensor var_8270 = const()[name = string("op_8270"), val = tensor([1, 8, 128, 64])]; tensor var_8271 = reshape(shape = var_8270, x = value_states_113)[name = string("op_8271")]; tensor var_8276 = const()[name = string("op_8276"), val = tensor([0, 1, 3, 2])]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor x_281 = transpose(perm = var_8254, x = var_8249)[name = string("transpose_34")]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = x_281)[name = string("mean_115")]; tensor input_257 = sub(x = x_281, y = mean_115)[name = string("input_257")]; tensor var_8293_axes_0 = const()[name = string("op_8293_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_32_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923257856)))]; fp16 var_8281_to_fp16 = const()[name = string("op_8281_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8293_cast_fp16 = layer_norm(axes = var_8293_axes_0, epsilon = var_8281_to_fp16, gamma = model_model_layers_32_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_8293_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor x_283 = transpose(perm = var_8265, x = var_8260)[name = string("transpose_33")]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = x_283)[name = string("mean_117")]; tensor input_259 = sub(x = x_283, y = mean_117)[name = string("input_259")]; tensor var_8311_axes_0 = const()[name = string("op_8311_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_32_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258176)))]; fp16 var_8299_to_fp16 = const()[name = string("op_8299_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8311_cast_fp16 = layer_norm(axes = var_8311_axes_0, epsilon = var_8299_to_fp16, gamma = model_model_layers_32_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_8311_cast_fp16")]; tensor var_8326 = mul(x = var_8293_cast_fp16, y = cos_5)[name = string("op_8326")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_8293_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_8293_cast_fp16)[name = string("x2_57")]; fp16 const_315_promoted = const()[name = string("const_315_promoted"), val = fp16(-0x1p+0)]; tensor var_8347 = mul(x = x2_57, y = const_315_promoted)[name = string("op_8347")]; int32 var_8349 = const()[name = string("op_8349"), val = int32(-1)]; bool var_8350_interleave_0 = const()[name = string("op_8350_interleave_0"), val = bool(false)]; tensor var_8350 = concat(axis = var_8349, interleave = var_8350_interleave_0, values = (var_8347, x1_57))[name = string("op_8350")]; tensor var_8351 = mul(x = var_8350, y = sin_5)[name = string("op_8351")]; tensor query_states_115 = add(x = var_8326, y = var_8351)[name = string("query_states_115")]; tensor var_8354 = mul(x = var_8311_cast_fp16, y = cos_5)[name = string("op_8354")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_8311_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_8311_cast_fp16)[name = string("x2_59")]; fp16 const_318_promoted = const()[name = string("const_318_promoted"), val = fp16(-0x1p+0)]; tensor var_8375 = mul(x = x2_59, y = const_318_promoted)[name = string("op_8375")]; int32 var_8377 = const()[name = string("op_8377"), val = int32(-1)]; bool var_8378_interleave_0 = const()[name = string("op_8378_interleave_0"), val = bool(false)]; tensor var_8378 = concat(axis = var_8377, interleave = var_8378_interleave_0, values = (var_8375, x1_59))[name = string("op_8378")]; tensor var_8379 = mul(x = var_8378, y = sin_5)[name = string("op_8379")]; tensor key_states_143 = add(x = var_8354, y = var_8379)[name = string("key_states_143")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([32])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([33])]; int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, var_1233, concat_255_values3_0))[name = string("concat_255")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_143, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_64")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([68])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([69])]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, var_1233, concat_259_values3_0))[name = string("concat_259")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_115 = transpose(perm = var_8276, x = var_8271)[name = string("transpose_32")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = value_states_115, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_65")]; tensor var_8450_begin_0 = const()[name = string("op_8450_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_8450_end_0 = const()[name = string("op_8450_end_0"), val = tensor([33, 8, 1024, 128])]; tensor var_8450_end_mask_0 = const()[name = string("op_8450_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8450_cast_fp16 = slice_by_index(begin = var_8450_begin_0, end = var_8450_end_0, end_mask = var_8450_end_mask_0, x = coreml_update_state_65)[name = string("op_8450_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_8450_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_8457_begin_0 = const()[name = string("op_8457_begin_0"), val = tensor([68, 0, 0, 0])]; tensor var_8457_end_0 = const()[name = string("op_8457_end_0"), val = tensor([69, 8, 1024, 128])]; tensor var_8457_end_mask_0 = const()[name = string("op_8457_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8457_cast_fp16 = slice_by_index(begin = var_8457_begin_0, end = var_8457_end_0, end_mask = var_8457_end_mask_0, x = coreml_update_state_65)[name = string("op_8457_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_8457_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_8486 = const()[name = string("op_8486"), val = tensor([1, 4, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_8486, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_8498 = const()[name = string("op_8498"), val = tensor([1, -1, 1024, 128])]; tensor key_states_147_cast_fp16 = reshape(shape = var_8498, x = x_289_cast_fp16)[name = string("key_states_147_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_8506 = const()[name = string("op_8506"), val = tensor([1, 4, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_8506, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; bool var_8533_transpose_x_0 = const()[name = string("op_8533_transpose_x_0"), val = bool(false)]; bool var_8533_transpose_y_0 = const()[name = string("op_8533_transpose_y_0"), val = bool(true)]; tensor var_8533 = matmul(transpose_x = var_8533_transpose_x_0, transpose_y = var_8533_transpose_y_0, x = query_states_115, y = key_states_147_cast_fp16)[name = string("op_8533")]; fp16 var_8534_to_fp16 = const()[name = string("op_8534_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_57_cast_fp16 = mul(x = var_8533, y = var_8534_to_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask)[name = string("attn_weights_59_cast_fp16")]; int32 var_8569 = const()[name = string("op_8569"), val = int32(-1)]; tensor var_8571_cast_fp16 = softmax(axis = var_8569, x = attn_weights_59_cast_fp16)[name = string("op_8571_cast_fp16")]; tensor concat_264 = const()[name = string("concat_264"), val = tensor([32, 64, 1024])]; tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_8571_cast_fp16)[name = string("reshape_42_cast_fp16")]; tensor concat_265 = const()[name = string("concat_265"), val = tensor([32, 1024, 128])]; tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_295_cast_fp16)[name = string("reshape_43_cast_fp16")]; bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 32, 64, 128])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor var_8583_perm_0 = const()[name = string("op_8583_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8602 = const()[name = string("op_8602"), val = tensor([1, 64, 4096])]; tensor var_8583_cast_fp16 = transpose(perm = var_8583_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_31")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_8602, x = var_8583_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_8607 = const()[name = string("op_8607"), val = tensor([0, 2, 1])]; string var_8623_pad_type_0 = const()[name = string("op_8623_pad_type_0"), val = string("valid")]; int32 var_8623_groups_0 = const()[name = string("op_8623_groups_0"), val = int32(1)]; tensor var_8623_strides_0 = const()[name = string("op_8623_strides_0"), val = tensor([1])]; tensor var_8623_pad_0 = const()[name = string("op_8623_pad_0"), val = tensor([0, 0])]; tensor var_8623_dilations_0 = const()[name = string("op_8623_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928501440))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8608_cast_fp16 = transpose(perm = var_8607, x = attn_output_145_cast_fp16)[name = string("transpose_30")]; tensor var_8623_cast_fp16 = conv(dilations = var_8623_dilations_0, groups = var_8623_groups_0, pad = var_8623_pad_0, pad_type = var_8623_pad_type_0, strides = var_8623_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_8608_cast_fp16)[name = string("op_8623_cast_fp16")]; tensor var_8627 = const()[name = string("op_8627"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_8627, x = var_8623_cast_fp16)[name = string("transpose_29")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_8646_axes_0 = const()[name = string("op_8646_axes_0"), val = tensor([-1])]; tensor model_model_layers_32_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_32_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928511744)))]; fp16 var_8634_to_fp16 = const()[name = string("op_8634_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8646_cast_fp16 = layer_norm(axes = var_8646_axes_0, epsilon = var_8634_to_fp16, gamma = model_model_layers_32_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_8646_cast_fp16")]; tensor var_8660 = const()[name = string("op_8660"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_8661 = transpose(perm = var_8660, x = var_8646_cast_fp16)[name = string("transpose_28")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_8661)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_32_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_32_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_32_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_8683_axes_0 = const()[name = string("op_8683_axes_0"), val = tensor([2])]; tensor var_8683 = squeeze(axes = var_8683_axes_0, x = e_29)[name = string("op_8683")]; tensor var_8684 = const()[name = string("op_8684"), val = tensor([0, 2, 1])]; tensor var_8685 = transpose(perm = var_8684, x = var_8683)[name = string("transpose_27")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_8685)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_8703_axes_0 = const()[name = string("op_8703_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_33_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928516928)))]; fp16 var_8691_to_fp16 = const()[name = string("op_8691_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8703_cast_fp16 = layer_norm(axes = var_8703_axes_0, epsilon = var_8691_to_fp16, gamma = model_model_layers_33_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_8703_cast_fp16")]; tensor var_8715 = const()[name = string("op_8715"), val = tensor([0, 2, 1])]; tensor var_8718_axes_0 = const()[name = string("op_8718_axes_0"), val = tensor([2])]; tensor var_8716 = transpose(perm = var_8715, x = var_8703_cast_fp16)[name = string("transpose_26")]; tensor var_8718 = expand_dims(axes = var_8718_axes_0, x = var_8716)[name = string("op_8718")]; string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_33_self_attn_q_proj_weight_palettized, x = var_8718)[name = string("query_states_121")]; string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_33_self_attn_k_proj_weight_palettized, x = var_8718)[name = string("key_states_151")]; string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_33_self_attn_v_proj_weight_palettized, x = var_8718)[name = string("value_states_121")]; tensor var_8760 = const()[name = string("op_8760"), val = tensor([1, 32, 128, 64])]; tensor var_8761 = reshape(shape = var_8760, x = query_states_121)[name = string("op_8761")]; tensor var_8766 = const()[name = string("op_8766"), val = tensor([0, 1, 3, 2])]; tensor var_8771 = const()[name = string("op_8771"), val = tensor([1, 8, 128, 64])]; tensor var_8772 = reshape(shape = var_8771, x = key_states_151)[name = string("op_8772")]; tensor var_8777 = const()[name = string("op_8777"), val = tensor([0, 1, 3, 2])]; tensor var_8782 = const()[name = string("op_8782"), val = tensor([1, 8, 128, 64])]; tensor var_8783 = reshape(shape = var_8782, x = value_states_121)[name = string("op_8783")]; tensor var_8788 = const()[name = string("op_8788"), val = tensor([0, 1, 3, 2])]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor x_301 = transpose(perm = var_8766, x = var_8761)[name = string("transpose_25")]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = x_301)[name = string("mean_123")]; tensor input_275 = sub(x = x_301, y = mean_123)[name = string("input_275")]; tensor var_8805_axes_0 = const()[name = string("op_8805_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_33_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522112)))]; fp16 var_8793_to_fp16 = const()[name = string("op_8793_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8805_cast_fp16 = layer_norm(axes = var_8805_axes_0, epsilon = var_8793_to_fp16, gamma = model_model_layers_33_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_8805_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor x_303 = transpose(perm = var_8777, x = var_8772)[name = string("transpose_24")]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = x_303)[name = string("mean_125")]; tensor input_277 = sub(x = x_303, y = mean_125)[name = string("input_277")]; tensor var_8823_axes_0 = const()[name = string("op_8823_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_33_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522432)))]; fp16 var_8811_to_fp16 = const()[name = string("op_8811_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8823_cast_fp16 = layer_norm(axes = var_8823_axes_0, epsilon = var_8811_to_fp16, gamma = model_model_layers_33_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_8823_cast_fp16")]; tensor var_8838 = mul(x = var_8805_cast_fp16, y = cos_5)[name = string("op_8838")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_8805_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_8805_cast_fp16)[name = string("x2_61")]; fp16 const_337_promoted = const()[name = string("const_337_promoted"), val = fp16(-0x1p+0)]; tensor var_8859 = mul(x = x2_61, y = const_337_promoted)[name = string("op_8859")]; int32 var_8861 = const()[name = string("op_8861"), val = int32(-1)]; bool var_8862_interleave_0 = const()[name = string("op_8862_interleave_0"), val = bool(false)]; tensor var_8862 = concat(axis = var_8861, interleave = var_8862_interleave_0, values = (var_8859, x1_61))[name = string("op_8862")]; tensor var_8863 = mul(x = var_8862, y = sin_5)[name = string("op_8863")]; tensor query_states_123 = add(x = var_8838, y = var_8863)[name = string("query_states_123")]; tensor var_8866 = mul(x = var_8823_cast_fp16, y = cos_5)[name = string("op_8866")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_8823_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_8823_cast_fp16)[name = string("x2_63")]; fp16 const_340_promoted = const()[name = string("const_340_promoted"), val = fp16(-0x1p+0)]; tensor var_8887 = mul(x = x2_63, y = const_340_promoted)[name = string("op_8887")]; int32 var_8889 = const()[name = string("op_8889"), val = int32(-1)]; bool var_8890_interleave_0 = const()[name = string("op_8890_interleave_0"), val = bool(false)]; tensor var_8890 = concat(axis = var_8889, interleave = var_8890_interleave_0, values = (var_8887, x1_63))[name = string("op_8890")]; tensor var_8891 = mul(x = var_8890, y = sin_5)[name = string("op_8891")]; tensor key_states_153 = add(x = var_8866, y = var_8891)[name = string("key_states_153")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([33])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([34])]; int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, var_1233, concat_273_values3_0))[name = string("concat_273")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_153, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_66")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([69])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([70])]; int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, var_1233, concat_277_values3_0))[name = string("concat_277")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_123 = transpose(perm = var_8788, x = var_8783)[name = string("transpose_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = value_states_123, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_67")]; tensor var_8962_begin_0 = const()[name = string("op_8962_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_8962_end_0 = const()[name = string("op_8962_end_0"), val = tensor([34, 8, 1024, 128])]; tensor var_8962_end_mask_0 = const()[name = string("op_8962_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8962_cast_fp16 = slice_by_index(begin = var_8962_begin_0, end = var_8962_end_0, end_mask = var_8962_end_mask_0, x = coreml_update_state_67)[name = string("op_8962_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_8962_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_8969_begin_0 = const()[name = string("op_8969_begin_0"), val = tensor([69, 0, 0, 0])]; tensor var_8969_end_0 = const()[name = string("op_8969_end_0"), val = tensor([70, 8, 1024, 128])]; tensor var_8969_end_mask_0 = const()[name = string("op_8969_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8969_cast_fp16 = slice_by_index(begin = var_8969_begin_0, end = var_8969_end_0, end_mask = var_8969_end_mask_0, x = coreml_update_state_67)[name = string("op_8969_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_8969_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_8998 = const()[name = string("op_8998"), val = tensor([1, 4, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_8998, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_9010 = const()[name = string("op_9010"), val = tensor([1, -1, 1024, 128])]; tensor key_states_157_cast_fp16 = reshape(shape = var_9010, x = x_309_cast_fp16)[name = string("key_states_157_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_9018 = const()[name = string("op_9018"), val = tensor([1, 4, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_9018, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; bool var_9045_transpose_x_0 = const()[name = string("op_9045_transpose_x_0"), val = bool(false)]; bool var_9045_transpose_y_0 = const()[name = string("op_9045_transpose_y_0"), val = bool(true)]; tensor var_9045 = matmul(transpose_x = var_9045_transpose_x_0, transpose_y = var_9045_transpose_y_0, x = query_states_123, y = key_states_157_cast_fp16)[name = string("op_9045")]; fp16 var_9046_to_fp16 = const()[name = string("op_9046_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_9045, y = var_9046_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_9081 = const()[name = string("op_9081"), val = int32(-1)]; tensor var_9083_cast_fp16 = softmax(axis = var_9081, x = attn_weights_63_cast_fp16)[name = string("op_9083_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([32, 64, 1024])]; tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_9083_cast_fp16)[name = string("reshape_45_cast_fp16")]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([32, 1024, 128])]; tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_315_cast_fp16)[name = string("reshape_46_cast_fp16")]; bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 32, 64, 128])]; tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor var_9095_perm_0 = const()[name = string("op_9095_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9114 = const()[name = string("op_9114"), val = tensor([1, 64, 4096])]; tensor var_9095_cast_fp16 = transpose(perm = var_9095_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_22")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_9114, x = var_9095_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_9119 = const()[name = string("op_9119"), val = tensor([0, 2, 1])]; string var_9135_pad_type_0 = const()[name = string("op_9135_pad_type_0"), val = string("valid")]; int32 var_9135_groups_0 = const()[name = string("op_9135_groups_0"), val = int32(1)]; tensor var_9135_strides_0 = const()[name = string("op_9135_strides_0"), val = tensor([1])]; tensor var_9135_pad_0 = const()[name = string("op_9135_pad_0"), val = tensor([0, 0])]; tensor var_9135_dilations_0 = const()[name = string("op_9135_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928522752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933765696))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9120_cast_fp16 = transpose(perm = var_9119, x = attn_output_155_cast_fp16)[name = string("transpose_21")]; tensor var_9135_cast_fp16 = conv(dilations = var_9135_dilations_0, groups = var_9135_groups_0, pad = var_9135_pad_0, pad_type = var_9135_pad_type_0, strides = var_9135_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_9120_cast_fp16)[name = string("op_9135_cast_fp16")]; tensor var_9139 = const()[name = string("op_9139"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_9139, x = var_9135_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_9158_axes_0 = const()[name = string("op_9158_axes_0"), val = tensor([-1])]; tensor model_model_layers_33_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_33_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933776000)))]; fp16 var_9146_to_fp16 = const()[name = string("op_9146_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9158_cast_fp16 = layer_norm(axes = var_9158_axes_0, epsilon = var_9146_to_fp16, gamma = model_model_layers_33_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_9158_cast_fp16")]; tensor var_9172 = const()[name = string("op_9172"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_9173 = transpose(perm = var_9172, x = var_9158_cast_fp16)[name = string("transpose_19")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_9173)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_33_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_33_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_33_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_9195_axes_0 = const()[name = string("op_9195_axes_0"), val = tensor([2])]; tensor var_9195 = squeeze(axes = var_9195_axes_0, x = e_31)[name = string("op_9195")]; tensor var_9196 = const()[name = string("op_9196"), val = tensor([0, 2, 1])]; tensor var_9197 = transpose(perm = var_9196, x = var_9195)[name = string("transpose_18")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_9197)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_9215_axes_0 = const()[name = string("op_9215_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_34_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933781184)))]; fp16 var_9203_to_fp16 = const()[name = string("op_9203_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9215_cast_fp16 = layer_norm(axes = var_9215_axes_0, epsilon = var_9203_to_fp16, gamma = model_model_layers_34_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_9215_cast_fp16")]; tensor var_9227 = const()[name = string("op_9227"), val = tensor([0, 2, 1])]; tensor var_9230_axes_0 = const()[name = string("op_9230_axes_0"), val = tensor([2])]; tensor var_9228 = transpose(perm = var_9227, x = var_9215_cast_fp16)[name = string("transpose_17")]; tensor var_9230 = expand_dims(axes = var_9230_axes_0, x = var_9228)[name = string("op_9230")]; string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_34_self_attn_q_proj_weight_palettized, x = var_9230)[name = string("query_states_129")]; string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_34_self_attn_k_proj_weight_palettized, x = var_9230)[name = string("key_states_161")]; string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_34_self_attn_v_proj_weight_palettized, x = var_9230)[name = string("value_states_129")]; tensor var_9272 = const()[name = string("op_9272"), val = tensor([1, 32, 128, 64])]; tensor var_9273 = reshape(shape = var_9272, x = query_states_129)[name = string("op_9273")]; tensor var_9278 = const()[name = string("op_9278"), val = tensor([0, 1, 3, 2])]; tensor var_9283 = const()[name = string("op_9283"), val = tensor([1, 8, 128, 64])]; tensor var_9284 = reshape(shape = var_9283, x = key_states_161)[name = string("op_9284")]; tensor var_9289 = const()[name = string("op_9289"), val = tensor([0, 1, 3, 2])]; tensor var_9294 = const()[name = string("op_9294"), val = tensor([1, 8, 128, 64])]; tensor var_9295 = reshape(shape = var_9294, x = value_states_129)[name = string("op_9295")]; tensor var_9300 = const()[name = string("op_9300"), val = tensor([0, 1, 3, 2])]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor x_321 = transpose(perm = var_9278, x = var_9273)[name = string("transpose_16")]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = x_321)[name = string("mean_131")]; tensor input_293 = sub(x = x_321, y = mean_131)[name = string("input_293")]; tensor var_9317_axes_0 = const()[name = string("op_9317_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_34_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786368)))]; fp16 var_9305_to_fp16 = const()[name = string("op_9305_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9317_cast_fp16 = layer_norm(axes = var_9317_axes_0, epsilon = var_9305_to_fp16, gamma = model_model_layers_34_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_9317_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor x_323 = transpose(perm = var_9289, x = var_9284)[name = string("transpose_15")]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = x_323)[name = string("mean_133")]; tensor input_295 = sub(x = x_323, y = mean_133)[name = string("input_295")]; tensor var_9335_axes_0 = const()[name = string("op_9335_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_34_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933786688)))]; fp16 var_9323_to_fp16 = const()[name = string("op_9323_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9335_cast_fp16 = layer_norm(axes = var_9335_axes_0, epsilon = var_9323_to_fp16, gamma = model_model_layers_34_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_9335_cast_fp16")]; tensor var_9350 = mul(x = var_9317_cast_fp16, y = cos_5)[name = string("op_9350")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_9317_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_9317_cast_fp16)[name = string("x2_65")]; fp16 const_359_promoted = const()[name = string("const_359_promoted"), val = fp16(-0x1p+0)]; tensor var_9371 = mul(x = x2_65, y = const_359_promoted)[name = string("op_9371")]; int32 var_9373 = const()[name = string("op_9373"), val = int32(-1)]; bool var_9374_interleave_0 = const()[name = string("op_9374_interleave_0"), val = bool(false)]; tensor var_9374 = concat(axis = var_9373, interleave = var_9374_interleave_0, values = (var_9371, x1_65))[name = string("op_9374")]; tensor var_9375 = mul(x = var_9374, y = sin_5)[name = string("op_9375")]; tensor query_states_131 = add(x = var_9350, y = var_9375)[name = string("query_states_131")]; tensor var_9378 = mul(x = var_9335_cast_fp16, y = cos_5)[name = string("op_9378")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_9335_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_9335_cast_fp16)[name = string("x2_67")]; fp16 const_362_promoted = const()[name = string("const_362_promoted"), val = fp16(-0x1p+0)]; tensor var_9399 = mul(x = x2_67, y = const_362_promoted)[name = string("op_9399")]; int32 var_9401 = const()[name = string("op_9401"), val = int32(-1)]; bool var_9402_interleave_0 = const()[name = string("op_9402_interleave_0"), val = bool(false)]; tensor var_9402 = concat(axis = var_9401, interleave = var_9402_interleave_0, values = (var_9399, x1_67))[name = string("op_9402")]; tensor var_9403 = mul(x = var_9402, y = sin_5)[name = string("op_9403")]; tensor key_states_163 = add(x = var_9378, y = var_9403)[name = string("key_states_163")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([34])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([35])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, var_1233, concat_291_values3_0))[name = string("concat_291")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_163, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_68")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([70])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([71])]; int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, var_1233, concat_295_values3_0))[name = string("concat_295")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_131 = transpose(perm = var_9300, x = var_9295)[name = string("transpose_14")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = value_states_131, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_69")]; tensor var_9474_begin_0 = const()[name = string("op_9474_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_9474_end_0 = const()[name = string("op_9474_end_0"), val = tensor([35, 8, 1024, 128])]; tensor var_9474_end_mask_0 = const()[name = string("op_9474_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9474_cast_fp16 = slice_by_index(begin = var_9474_begin_0, end = var_9474_end_0, end_mask = var_9474_end_mask_0, x = coreml_update_state_69)[name = string("op_9474_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_9474_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_9481_begin_0 = const()[name = string("op_9481_begin_0"), val = tensor([70, 0, 0, 0])]; tensor var_9481_end_0 = const()[name = string("op_9481_end_0"), val = tensor([71, 8, 1024, 128])]; tensor var_9481_end_mask_0 = const()[name = string("op_9481_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9481_cast_fp16 = slice_by_index(begin = var_9481_begin_0, end = var_9481_end_0, end_mask = var_9481_end_mask_0, x = coreml_update_state_69)[name = string("op_9481_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_9481_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_9510 = const()[name = string("op_9510"), val = tensor([1, 4, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_9510, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_9522 = const()[name = string("op_9522"), val = tensor([1, -1, 1024, 128])]; tensor key_states_167_cast_fp16 = reshape(shape = var_9522, x = x_329_cast_fp16)[name = string("key_states_167_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_9530 = const()[name = string("op_9530"), val = tensor([1, 4, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_9530, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; bool var_9557_transpose_x_0 = const()[name = string("op_9557_transpose_x_0"), val = bool(false)]; bool var_9557_transpose_y_0 = const()[name = string("op_9557_transpose_y_0"), val = bool(true)]; tensor var_9557 = matmul(transpose_x = var_9557_transpose_x_0, transpose_y = var_9557_transpose_y_0, x = query_states_131, y = key_states_167_cast_fp16)[name = string("op_9557")]; fp16 var_9558_to_fp16 = const()[name = string("op_9558_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_65_cast_fp16 = mul(x = var_9557, y = var_9558_to_fp16)[name = string("attn_weights_65_cast_fp16")]; tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = causal_mask)[name = string("attn_weights_67_cast_fp16")]; int32 var_9593 = const()[name = string("op_9593"), val = int32(-1)]; tensor var_9595_cast_fp16 = softmax(axis = var_9593, x = attn_weights_67_cast_fp16)[name = string("op_9595_cast_fp16")]; tensor concat_300 = const()[name = string("concat_300"), val = tensor([32, 64, 1024])]; tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_9595_cast_fp16)[name = string("reshape_48_cast_fp16")]; tensor concat_301 = const()[name = string("concat_301"), val = tensor([32, 1024, 128])]; tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_335_cast_fp16)[name = string("reshape_49_cast_fp16")]; bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 32, 64, 128])]; tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; tensor var_9607_perm_0 = const()[name = string("op_9607_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9626 = const()[name = string("op_9626"), val = tensor([1, 64, 4096])]; tensor var_9607_cast_fp16 = transpose(perm = var_9607_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_13")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_9626, x = var_9607_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_9631 = const()[name = string("op_9631"), val = tensor([0, 2, 1])]; string var_9647_pad_type_0 = const()[name = string("op_9647_pad_type_0"), val = string("valid")]; int32 var_9647_groups_0 = const()[name = string("op_9647_groups_0"), val = int32(1)]; tensor var_9647_strides_0 = const()[name = string("op_9647_strides_0"), val = tensor([1])]; tensor var_9647_pad_0 = const()[name = string("op_9647_pad_0"), val = tensor([0, 0])]; tensor var_9647_dilations_0 = const()[name = string("op_9647_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939029952))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9632_cast_fp16 = transpose(perm = var_9631, x = attn_output_165_cast_fp16)[name = string("transpose_12")]; tensor var_9647_cast_fp16 = conv(dilations = var_9647_dilations_0, groups = var_9647_groups_0, pad = var_9647_pad_0, pad_type = var_9647_pad_type_0, strides = var_9647_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_9632_cast_fp16)[name = string("op_9647_cast_fp16")]; tensor var_9651 = const()[name = string("op_9651"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_9651, x = var_9647_cast_fp16)[name = string("transpose_11")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_9670_axes_0 = const()[name = string("op_9670_axes_0"), val = tensor([-1])]; tensor model_model_layers_34_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_34_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939040256)))]; fp16 var_9658_to_fp16 = const()[name = string("op_9658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9670_cast_fp16 = layer_norm(axes = var_9670_axes_0, epsilon = var_9658_to_fp16, gamma = model_model_layers_34_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_9670_cast_fp16")]; tensor var_9684 = const()[name = string("op_9684"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_9685 = transpose(perm = var_9684, x = var_9670_cast_fp16)[name = string("transpose_10")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_9685)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_34_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_34_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_34_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_9707_axes_0 = const()[name = string("op_9707_axes_0"), val = tensor([2])]; tensor var_9707 = squeeze(axes = var_9707_axes_0, x = e_33)[name = string("op_9707")]; tensor var_9708 = const()[name = string("op_9708"), val = tensor([0, 2, 1])]; tensor var_9709 = transpose(perm = var_9708, x = var_9707)[name = string("transpose_9")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_9709)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_9727_axes_0 = const()[name = string("op_9727_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_35_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939045440)))]; fp16 var_9715_to_fp16 = const()[name = string("op_9715_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9727_cast_fp16 = layer_norm(axes = var_9727_axes_0, epsilon = var_9715_to_fp16, gamma = model_model_layers_35_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_9727_cast_fp16")]; tensor var_9739 = const()[name = string("op_9739"), val = tensor([0, 2, 1])]; tensor var_9742_axes_0 = const()[name = string("op_9742_axes_0"), val = tensor([2])]; tensor var_9740 = transpose(perm = var_9739, x = var_9727_cast_fp16)[name = string("transpose_8")]; tensor var_9742 = expand_dims(axes = var_9742_axes_0, x = var_9740)[name = string("op_9742")]; string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_35_self_attn_q_proj_weight_palettized, x = var_9742)[name = string("query_states_137")]; string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_35_self_attn_k_proj_weight_palettized, x = var_9742)[name = string("key_states_171")]; string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_35_self_attn_v_proj_weight_palettized, x = var_9742)[name = string("value_states_137")]; tensor var_9784 = const()[name = string("op_9784"), val = tensor([1, 32, 128, 64])]; tensor var_9785 = reshape(shape = var_9784, x = query_states_137)[name = string("op_9785")]; tensor var_9790 = const()[name = string("op_9790"), val = tensor([0, 1, 3, 2])]; tensor var_9795 = const()[name = string("op_9795"), val = tensor([1, 8, 128, 64])]; tensor var_9796 = reshape(shape = var_9795, x = key_states_171)[name = string("op_9796")]; tensor var_9801 = const()[name = string("op_9801"), val = tensor([0, 1, 3, 2])]; tensor var_9806 = const()[name = string("op_9806"), val = tensor([1, 8, 128, 64])]; tensor var_9807 = reshape(shape = var_9806, x = value_states_137)[name = string("op_9807")]; tensor var_9812 = const()[name = string("op_9812"), val = tensor([0, 1, 3, 2])]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor x_341 = transpose(perm = var_9790, x = var_9785)[name = string("transpose_7")]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = x_341)[name = string("mean_139")]; tensor input_311 = sub(x = x_341, y = mean_139)[name = string("input_311")]; tensor var_9829_axes_0 = const()[name = string("op_9829_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_35_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050624)))]; fp16 var_9817_to_fp16 = const()[name = string("op_9817_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9829_cast_fp16 = layer_norm(axes = var_9829_axes_0, epsilon = var_9817_to_fp16, gamma = model_model_layers_35_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_9829_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor x_343 = transpose(perm = var_9801, x = var_9796)[name = string("transpose_6")]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = x_343)[name = string("mean_141")]; tensor input_313 = sub(x = x_343, y = mean_141)[name = string("input_313")]; tensor var_9847_axes_0 = const()[name = string("op_9847_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_35_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939050944)))]; fp16 var_9835_to_fp16 = const()[name = string("op_9835_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9847_cast_fp16 = layer_norm(axes = var_9847_axes_0, epsilon = var_9835_to_fp16, gamma = model_model_layers_35_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_9847_cast_fp16")]; tensor var_9862 = mul(x = var_9829_cast_fp16, y = cos_5)[name = string("op_9862")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 32, 64, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_9829_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 32, 64, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_9829_cast_fp16)[name = string("x2_69")]; fp16 const_381_promoted = const()[name = string("const_381_promoted"), val = fp16(-0x1p+0)]; tensor var_9883 = mul(x = x2_69, y = const_381_promoted)[name = string("op_9883")]; int32 var_9885 = const()[name = string("op_9885"), val = int32(-1)]; bool var_9886_interleave_0 = const()[name = string("op_9886_interleave_0"), val = bool(false)]; tensor var_9886 = concat(axis = var_9885, interleave = var_9886_interleave_0, values = (var_9883, x1_69))[name = string("op_9886")]; tensor var_9887 = mul(x = var_9886, y = sin_5)[name = string("op_9887")]; tensor query_states_139 = add(x = var_9862, y = var_9887)[name = string("query_states_139")]; tensor var_9890 = mul(x = var_9847_cast_fp16, y = cos_5)[name = string("op_9890")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_9847_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_9847_cast_fp16)[name = string("x2")]; fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; tensor var_9911 = mul(x = x2, y = const_384_promoted)[name = string("op_9911")]; int32 var_9913 = const()[name = string("op_9913"), val = int32(-1)]; bool var_9914_interleave_0 = const()[name = string("op_9914_interleave_0"), val = bool(false)]; tensor var_9914 = concat(axis = var_9913, interleave = var_9914_interleave_0, values = (var_9911, x1))[name = string("op_9914")]; tensor var_9915 = mul(x = var_9914, y = sin_5)[name = string("op_9915")]; tensor key_states_173 = add(x = var_9890, y = var_9915)[name = string("key_states_173")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([35])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([36])]; int32 concat_308_axis_0 = const()[name = string("concat_308_axis_0"), val = int32(0)]; bool concat_308_interleave_0 = const()[name = string("concat_308_interleave_0"), val = bool(false)]; tensor concat_308 = concat(axis = concat_308_axis_0, interleave = concat_308_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_308")]; tensor concat_309_values1_0 = const()[name = string("concat_309_values1_0"), val = tensor([0])]; tensor concat_309_values3_0 = const()[name = string("concat_309_values3_0"), val = tensor([0])]; int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (expand_dims_208, concat_309_values1_0, var_1233, concat_309_values3_0))[name = string("concat_309")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_173, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_70")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([71])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([72])]; int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_312")]; tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (expand_dims_214, concat_313_values1_0, var_1233, concat_313_values3_0))[name = string("concat_313")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_139 = transpose(perm = var_9812, x = var_9807)[name = string("transpose_5")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_312, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_313, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = value_states_139, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_71")]; tensor var_9986_begin_0 = const()[name = string("op_9986_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_9986_end_0 = const()[name = string("op_9986_end_0"), val = tensor([36, 8, 1024, 128])]; tensor var_9986_end_mask_0 = const()[name = string("op_9986_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9986_cast_fp16 = slice_by_index(begin = var_9986_begin_0, end = var_9986_end_0, end_mask = var_9986_end_mask_0, x = coreml_update_state_71)[name = string("op_9986_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_9986_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_9993_begin_0 = const()[name = string("op_9993_begin_0"), val = tensor([71, 0, 0, 0])]; tensor var_9993_end_0 = const()[name = string("op_9993_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_9993_end_mask_0 = const()[name = string("op_9993_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9993_cast_fp16 = slice_by_index(begin = var_9993_begin_0, end = var_9993_end_0, end_mask = var_9993_end_mask_0, x = coreml_update_state_71)[name = string("op_9993_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_9993_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_10022 = const()[name = string("op_10022"), val = tensor([1, 4, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_10022, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_10034 = const()[name = string("op_10034"), val = tensor([1, -1, 1024, 128])]; tensor key_states_177_cast_fp16 = reshape(shape = var_10034, x = x_349_cast_fp16)[name = string("key_states_177_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_10042 = const()[name = string("op_10042"), val = tensor([1, 4, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_10042, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; bool var_10069_transpose_x_0 = const()[name = string("op_10069_transpose_x_0"), val = bool(false)]; bool var_10069_transpose_y_0 = const()[name = string("op_10069_transpose_y_0"), val = bool(true)]; tensor var_10069 = matmul(transpose_x = var_10069_transpose_x_0, transpose_y = var_10069_transpose_y_0, x = query_states_139, y = key_states_177_cast_fp16)[name = string("op_10069")]; fp16 var_10070_to_fp16 = const()[name = string("op_10070_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_69_cast_fp16 = mul(x = var_10069, y = var_10070_to_fp16)[name = string("attn_weights_69_cast_fp16")]; tensor attn_weights_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_cast_fp16")]; int32 var_10105 = const()[name = string("op_10105"), val = int32(-1)]; tensor var_10107_cast_fp16 = softmax(axis = var_10105, x = attn_weights_cast_fp16)[name = string("op_10107_cast_fp16")]; tensor concat_318 = const()[name = string("concat_318"), val = tensor([32, 64, 1024])]; tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_10107_cast_fp16)[name = string("reshape_51_cast_fp16")]; tensor concat_319 = const()[name = string("concat_319"), val = tensor([32, 1024, 128])]; tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_355_cast_fp16)[name = string("reshape_52_cast_fp16")]; bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 32, 64, 128])]; tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; tensor var_10119_perm_0 = const()[name = string("op_10119_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10138 = const()[name = string("op_10138"), val = tensor([1, 64, 4096])]; tensor var_10119_cast_fp16 = transpose(perm = var_10119_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_4")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_10138, x = var_10119_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_10143 = const()[name = string("op_10143"), val = tensor([0, 2, 1])]; string var_10159_pad_type_0 = const()[name = string("op_10159_pad_type_0"), val = string("valid")]; int32 var_10159_groups_0 = const()[name = string("op_10159_groups_0"), val = int32(1)]; tensor var_10159_strides_0 = const()[name = string("op_10159_strides_0"), val = tensor([1])]; tensor var_10159_pad_0 = const()[name = string("op_10159_pad_0"), val = tensor([0, 0])]; tensor var_10159_dilations_0 = const()[name = string("op_10159_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939051264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944294208))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10144_cast_fp16 = transpose(perm = var_10143, x = attn_output_175_cast_fp16)[name = string("transpose_3")]; tensor var_10159_cast_fp16 = conv(dilations = var_10159_dilations_0, groups = var_10159_groups_0, pad = var_10159_pad_0, pad_type = var_10159_pad_type_0, strides = var_10159_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_10144_cast_fp16)[name = string("op_10159_cast_fp16")]; tensor var_10163 = const()[name = string("op_10163"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_10163, x = var_10159_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_107_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor mean_143_axes_0 = const()[name = string("mean_143_axes_0"), val = tensor([-1])]; bool mean_143_keep_dims_0 = const()[name = string("mean_143_keep_dims_0"), val = bool(true)]; tensor mean_143_cast_fp16 = reduce_mean(axes = mean_143_axes_0, keep_dims = mean_143_keep_dims_0, x = hidden_states_107_cast_fp16)[name = string("mean_143_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_107_cast_fp16, y = mean_143_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_10182_axes_0 = const()[name = string("op_10182_axes_0"), val = tensor([-1])]; tensor model_model_layers_35_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_35_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944304512)))]; fp16 var_10170_to_fp16 = const()[name = string("op_10170_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10182_cast_fp16 = layer_norm(axes = var_10182_axes_0, epsilon = var_10170_to_fp16, gamma = model_model_layers_35_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_10182_cast_fp16")]; tensor var_10196 = const()[name = string("op_10196"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_10197 = transpose(perm = var_10196, x = var_10182_cast_fp16)[name = string("transpose_1")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_10197)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_35_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_35_mlp_up_proj_weight_palettized, x = input_319)[name = string("b")]; tensor c = silu(x = input_321)[name = string("c")]; tensor input_323 = mul(x = c, y = b)[name = string("input_323")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_35_mlp_down_proj_weight_palettized, x = input_323)[name = string("e")]; tensor var_10219_axes_0 = const()[name = string("op_10219_axes_0"), val = tensor([2])]; tensor var_10219 = squeeze(axes = var_10219_axes_0, x = e)[name = string("op_10219")]; tensor var_10220 = const()[name = string("op_10220"), val = tensor([0, 2, 1])]; tensor var_10221 = transpose(perm = var_10220, x = var_10219)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = var_10221)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_10239_axes_0 = const()[name = string("op_10239_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944309696)))]; fp16 var_10227_to_fp16 = const()[name = string("op_10227_to_fp16"), val = fp16(0x1.1p-20)]; tensor output_hidden_states = layer_norm(axes = var_10239_axes_0, epsilon = var_10227_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_10239_cast_fp16")]; } -> (output_hidden_states); }